# 2025 DL Lab8: RL Assignment_Super Mario World

**Your Answer:**    
Hi I'm XXX, XXXXXXXXXX.

## Overview
This project implements a **Deep Reinforcement Learning** pipeline to train an autonomous agent for Super Mario World. Leveraging the **Proximal Policy Optimization (PPO)** algorithm, the system interacts with the **stable-retro** environment to master the YoshiIsland1 level. Key components include a custom Vision Backbone for extracting features from raw pixel data and a suite of Environment Wrappers that handle frame preprocessing, action discretization, and reward shaping to facilitate efficient learning.

Reward function implement  
should do something in the beginning (monster attack)  
Custom PPO implement  
pre train weight 差不多，主要是 reward function  
model weight capacity 1GB  
class name 不要動 (可以新增，但是原本有的不要動)

## Imports

In [1]:
import os
import numpy as np
import retro
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.vec_env import VecNormalize

from eval import evaluate_policy, record_video
from custom_policy import VisionBackbonePolicy, CustomPPO

  from .autonotebook import tqdm as notebook_tqdm


## Configuration

In [2]:
# Game Settings
GAME = "SuperMarioWorld-Snes"
STATE = "YoshiIsland1"

# Training Settings
# TOTAL_STEPS = 0x1400000 # 20,971,520
# TOTAL_STEPS = 0x0A00000 # 10,485,760
TOTAL_STEPS = 0X3200000 # 52,428,800
TRAIN_CHUNK = 0x0040000 #    262,144
N_ENVS = 16

# Evaluation & Recording Settings
EVAL_EPISODES = 3
EVAL_MAX_STEPS = 18000
RECORD_STEPS = 1800

# Directories
LOG_DIR = "./runs_smw"
VIDEO_DIR       = os.path.join(LOG_DIR, "videos")
CKPT_DIR        = os.path.join(LOG_DIR, "checkpoints")
TENSORBOARD_LOG = os.path.join(LOG_DIR, "tb")

os.makedirs(LOG_DIR,   exist_ok=True)
os.makedirs(CKPT_DIR,  exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)

## Environment Functions

In [3]:
from wrappers import make_base_env
def _make_env_thunk(game: str, state: str):
    """Return a function that creates an environment (for multiprocessing)."""
    def _thunk():
        return make_base_env(game, state)
    return _thunk

def make_vec_env(game: str, state: str, n_envs: int, use_subproc: bool = True):
    """Create a vectorized environment (multiple envs running in parallel)."""
    env_fns = [_make_env_thunk(game, state) for _ in range(n_envs)]
    
    if use_subproc and n_envs > 1:
        vec_env = SubprocVecEnv(env_fns)
    else:
        vec_env = DummyVecEnv(env_fns)

    return vec_env

## Initialize Env & Model

In [4]:
# 1. Create Training Environment
train_env = make_vec_env(GAME, STATE, n_envs=N_ENVS)
# train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)
print(f"Environment created: {GAME} - {STATE} with {N_ENVS} parallel envs.")

checkpoint_path = "None" # 6225920 (19) 有破壞
checkpoint_path = "runs_smw/preserved/Enc5_67.zip"

best_mean = -1e18
trained = 0
round_idx = 0

# 2. Initialize Model
if os.path.exists(checkpoint_path):
    # 讀取現有模型
    model = CustomPPO.load(
        checkpoint_path, 
        env=train_env,
        device="cuda:0" # 確保使用 GPU
    )
    trained = model.num_timesteps
    round_idx = int(trained / TRAIN_CHUNK)
    print(f"[Sucess] Loaded model from {checkpoint_path}")
    print(f"trained: {trained}, round_index: {round_idx}")
else:
    print(f"[Fail] Can't load {checkpoint_path}. Will use new model")
    model = CustomPPO(
        VisionBackbonePolicy,
        train_env,
        policy_kwargs   = dict(normalize_images=False),
        n_epochs        = 4,
        n_steps         = 512,
        batch_size      = 512,
        learning_rate   = 1e-4,
        verbose         = 1,
        gamma           = 0.96875,
        kl_coef         = 1,
        clip_range      = 0.125,
        ent_coef        = 0.0375,
        tensorboard_log = TENSORBOARD_LOG,
    )

Environment created: SuperMarioWorld-Snes - YoshiIsland1 with 16 parallel envs.
[Sucess] Loaded model from runs_smw/preserved/Enc5_67.zip
trained: 17825792, round_index: 68


In [5]:
# model.save("policy")

In [6]:
# import os
# import glob
# from custom_policy import CustomPPO
# from eval import record_video  # 確保 eval.py 在同一目錄下

# # ================= 設定區 =================
# target_numbers = [59, 63, 67]

# # ================= 執行迴圈 =================
# print(f"準備測試以下 Checkpoints: {target_numbers}")

# for num in target_numbers:
#     model_path = os.path.join(CKPT_DIR, f"Enc5_{num}.zip")
    
#     # 檢查檔案是否存在
#     if not os.path.exists(model_path):
#         print(f"⚠️ 找不到檔案: {model_path}，跳過。")
#         continue
    
#     print(f"\n[{num}] 正在載入模型: {model_path} ...")
    
#     try:
#         # 1. 載入模型 (不需要 env 參數也能載入權重)
#         # 如果你有改過 CustomPPO 的參數，load 會自動讀取 zip 裡的設定
#         model = CustomPPO.load(model_path, device="auto") # device="auto" 會自動用 GPU
        
#         # 2. 錄製影片
#         prefix_name = f"test_{num}"
#         print(f"[{num}] 正在錄影 (長度 {RECORD_STEPS} steps)...")
        
#         record_video(
#             model=model,
#             game=GAME,
#             state=STATE,
#             out_dir=VIDEO_DIR,
#             video_len=RECORD_STEPS,
#             prefix=prefix_name
#         )
#         print(f"✅ 完成！影片已儲存為 {prefix_name}.mp4")
        
#     except Exception as e:
#         print(f"❌ 發生錯誤 (Model: {num}): {e}")

# print("\n所有測試結束。")

## Training Loop

In [None]:
try:
    while trained < TOTAL_STEPS:
        round_idx += 1
        chunk = min(TRAIN_CHUNK, TOTAL_STEPS - trained)
        # chunk = 2000
        label = "Enc5"
        tagged_label = f"{label}_{int(trained/TRAIN_CHUNK)}"

        print(f"\n=== Round {round_idx} | Learn {chunk} steps (Total trained: {trained}) ===")
        
        # --- Train ---
        model.learn(total_timesteps=chunk, reset_num_timesteps=False, tb_log_name=label)
        trained += chunk

        # --- Save Checkpoint ---
        ckpt_path = os.path.join(CKPT_DIR, f"{tagged_label}.zip")
        model.save(ckpt_path)
        print(f"Saved checkpoint: {ckpt_path}")

        # --- Evaluate ---
        mean_ret, best_ret = evaluate_policy(
            model,
            GAME,
            STATE,
            n_episodes=EVAL_EPISODES,
            max_steps=EVAL_MAX_STEPS,
        )
        print(f"[EVAL] Mean Return: {mean_ret:.3f}, Best Return: {best_ret:.3f}")

        # --- Save Best Model ---
        # if mean_ret > best_mean:
        #     best_mean = mean_ret
        #     best_path = os.path.join(LOG_DIR, "best_model.zip")
        #     model.save(best_path)
        #     print(f"New best record. Saved to {best_path}")

        # --- Record Video ---
        out_path = os.path.join(VIDEO_DIR, label)
        os.makedirs(out_path,  exist_ok=True)
        record_video(
            model,
            GAME,
            STATE,
            VIDEO_DIR,
            video_len=RECORD_STEPS,
            prefix=f"{label}/{tagged_label}_{mean_ret:.2f}",
        )

except KeyboardInterrupt:
    print("\nTraining interrupted manually.")

finally:
    train_env.close()
    print("Training finished. Environment closed.")
    
"""
tensorboard --logdir=./runs_smw/tb
"""


=== Round 69 | Learn 262144 steps (Total trained: 17825792) ===
Logging to ./runs_smw/tb/Enc5_0


---------------------------------
| time/              |          |
|    fps             | 1074     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 17833984 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 894         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 17842176    |
| train/                |             |
|    approx_kl          | 0.013940858 |
|    entropy_loss       | -1.62       |
|    explained_variance | 0.927       |
|    learning_rate      | 0.0001      |
|    loss               | 0.367       |
|    mean_step_reward   | 0.28193492  |
|    n_updates          | 8708        |
|    policyGradLoss     | 0.00121     |
|    value_loss         | 1.4         |
---------------------------------------
---------------------------------------
| time/                 |             |
| 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_68.zip
[EVAL] Mean Return: 520.960, Best Return: 525.627
Saved video to ./runs_smw/videos/Enc5/Enc5_68_520.96.mp4

=== Round 70 | Learn 262144 steps (Total trained: 18087936) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1106     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 18096128 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 894         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 18104320    |
| train/                |             |
|    approx_kl          | 0.011413626 |
|    entropy_loss       | -1.7        |
|    explained_variance | 0.951       |
|    learning_rate      | 0.0001      |
|    loss               | 1.09        |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_69.zip
[EVAL] Mean Return: 524.401, Best Return: 530.401
Saved video to ./runs_smw/videos/Enc5/Enc5_69_524.40.mp4

=== Round 71 | Learn 262144 steps (Total trained: 18350080) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1131     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 18358272 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 921         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 18366464    |
| train/                |             |
|    approx_kl          | 0.009198898 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.965       |
|    learning_rate      | 0.0001      |
|    loss               | 0.832       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_70.zip
[EVAL] Mean Return: 49.000, Best Return: 49.667
Saved video to ./runs_smw/videos/Enc5/Enc5_70_49.00.mp4

=== Round 72 | Learn 262144 steps (Total trained: 18612224) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1159     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 18620416 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 902        |
|    iterations         | 2          |
|    time_elapsed       | 18         |
|    total_timesteps    | 18628608   |
| train/                |            |
|    approx_kl          | 0.00921548 |
|    entropy_loss       | -1.71      |
|    explained_variance | 0.954      |
|    learning_rate      | 0.0001     |
|    loss               | 0.53       |
|    mean_step_reward   | 0.32696962 |
|

Saved checkpoint: ./runs_smw/checkpoints/Enc5_71.zip
[EVAL] Mean Return: 94.215, Best Return: 95.548
Saved video to ./runs_smw/videos/Enc5/Enc5_71_94.22.mp4

=== Round 73 | Learn 262144 steps (Total trained: 18874368) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1120     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 18882560 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 902         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 18890752    |
| train/                |             |
|    approx_kl          | 0.011234842 |
|    entropy_loss       | -1.76       |
|    explained_variance | 0.977       |
|    learning_rate      | 0.0001      |
|    loss               | 0.565       |
|    mean_step_reward   | 0.

Saved checkpoint: ./runs_smw/checkpoints/Enc5_72.zip
[EVAL] Mean Return: 527.612, Best Return: 532.279
Saved video to ./runs_smw/videos/Enc5/Enc5_72_527.61.mp4

=== Round 74 | Learn 262144 steps (Total trained: 19136512) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1137     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 19144704 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 910         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 19152896    |
| train/                |             |
|    approx_kl          | 0.011578826 |
|    entropy_loss       | -1.7        |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.378       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_73.zip
[EVAL] Mean Return: 530.211, Best Return: 536.211
Saved video to ./runs_smw/videos/Enc5/Enc5_73_530.21.mp4

=== Round 75 | Learn 262144 steps (Total trained: 19398656) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1147     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 19406848 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 926         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 19415040    |
| train/                |             |
|    approx_kl          | 0.014457919 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.972       |
|    learning_rate      | 0.0001      |
|    loss               | 0.573       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_74.zip
[EVAL] Mean Return: 525.601, Best Return: 529.601
Saved video to ./runs_smw/videos/Enc5/Enc5_74_525.60.mp4

=== Round 76 | Learn 262144 steps (Total trained: 19660800) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1137     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 19668992 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 914         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 19677184    |
| train/                |             |
|    approx_kl          | 0.015082724 |
|    entropy_loss       | -1.68       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.378       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_75.zip
[EVAL] Mean Return: 409.562, Best Return: 414.228
Saved video to ./runs_smw/videos/Enc5/Enc5_75_409.56.mp4

=== Round 77 | Learn 262144 steps (Total trained: 19922944) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1134     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 19931136 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 907         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 19939328    |
| train/                |             |
|    approx_kl          | 0.009883723 |
|    entropy_loss       | -1.7        |
|    explained_variance | 0.976       |
|    learning_rate      | 0.0001      |
|    loss               | 0.599       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_76.zip
[EVAL] Mean Return: 471.768, Best Return: 474.435
Saved video to ./runs_smw/videos/Enc5/Enc5_76_471.77.mp4

=== Round 78 | Learn 262144 steps (Total trained: 20185088) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1136     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 20193280 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 922         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 20201472    |
| train/                |             |
|    approx_kl          | 0.009860938 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.981       |
|    learning_rate      | 0.0001      |
|    loss               | 0.48        |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_77.zip
[EVAL] Mean Return: 49.050, Best Return: 49.717
Saved video to ./runs_smw/videos/Enc5/Enc5_77_49.05.mp4

=== Round 79 | Learn 262144 steps (Total trained: 20447232) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1090     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 20455424 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 900        |
|    iterations         | 2          |
|    time_elapsed       | 18         |
|    total_timesteps    | 20463616   |
| train/                |            |
|    approx_kl          | 0.01050523 |
|    entropy_loss       | -1.67      |
|    explained_variance | 0.984      |
|    learning_rate      | 0.0001     |
|    loss               | 0.768      |
|    mean_step_reward   | 0.3575962  |
|

Saved checkpoint: ./runs_smw/checkpoints/Enc5_78.zip
[EVAL] Mean Return: 522.132, Best Return: 527.465
Saved video to ./runs_smw/videos/Enc5/Enc5_78_522.13.mp4

=== Round 80 | Learn 262144 steps (Total trained: 20709376) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1119     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 20717568 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 924         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 20725760    |
| train/                |             |
|    approx_kl          | 0.010909084 |
|    entropy_loss       | -1.67       |
|    explained_variance | 0.972       |
|    learning_rate      | 0.0001      |
|    loss               | 0.219       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_79.zip
[EVAL] Mean Return: 517.467, Best Return: 524.133
Saved video to ./runs_smw/videos/Enc5/Enc5_79_517.47.mp4

=== Round 81 | Learn 262144 steps (Total trained: 20971520) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1096     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 20979712 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 898         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 20987904    |
| train/                |             |
|    approx_kl          | 0.008871691 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.983       |
|    learning_rate      | 0.0001      |
|    loss               | 0.587       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_80.zip
[EVAL] Mean Return: 515.396, Best Return: 524.396
Saved video to ./runs_smw/videos/Enc5/Enc5_80_515.40.mp4

=== Round 82 | Learn 262144 steps (Total trained: 21233664) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1158     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 21241856 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 923         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 21250048    |
| train/                |             |
|    approx_kl          | 0.016539535 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.955       |
|    learning_rate      | 0.0001      |
|    loss               | 0.741       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_81.zip
[EVAL] Mean Return: 527.530, Best Return: 536.196
Saved video to ./runs_smw/videos/Enc5/Enc5_81_527.53.mp4

=== Round 83 | Learn 262144 steps (Total trained: 21495808) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1132     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 21504000 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 918         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 21512192    |
| train/                |             |
|    approx_kl          | 0.013084386 |
|    entropy_loss       | -1.75       |
|    explained_variance | 0.964       |
|    learning_rate      | 0.0001      |
|    loss               | 1.08        |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_82.zip
[EVAL] Mean Return: 527.822, Best Return: 535.822
Saved video to ./runs_smw/videos/Enc5/Enc5_82_527.82.mp4

=== Round 84 | Learn 262144 steps (Total trained: 21757952) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1126     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 21766144 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 923         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 21774336    |
| train/                |             |
|    approx_kl          | 0.023399739 |
|    entropy_loss       | -1.68       |
|    explained_variance | 0.955       |
|    learning_rate      | 0.0001      |
|    loss               | 1.32        |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_83.zip
[EVAL] Mean Return: 71.849, Best Return: 73.182
Saved video to ./runs_smw/videos/Enc5/Enc5_83_71.85.mp4

=== Round 85 | Learn 262144 steps (Total trained: 22020096) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1126     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 22028288 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 921         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 22036480    |
| train/                |             |
|    approx_kl          | 0.013919337 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.98        |
|    learning_rate      | 0.0001      |
|    loss               | 0.308       |
|    mean_step_reward   | 0.

Saved checkpoint: ./runs_smw/checkpoints/Enc5_84.zip
[EVAL] Mean Return: 409.886, Best Return: 415.219
Saved video to ./runs_smw/videos/Enc5/Enc5_84_409.89.mp4

=== Round 86 | Learn 262144 steps (Total trained: 22282240) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1137     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 22290432 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 920         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 22298624    |
| train/                |             |
|    approx_kl          | 0.012365309 |
|    entropy_loss       | -1.7        |
|    explained_variance | 0.984       |
|    learning_rate      | 0.0001      |
|    loss               | 0.549       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_85.zip
[EVAL] Mean Return: 532.862, Best Return: 540.862
Saved video to ./runs_smw/videos/Enc5/Enc5_85_532.86.mp4

=== Round 87 | Learn 262144 steps (Total trained: 22544384) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1092     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 22552576 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 901         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 22560768    |
| train/                |             |
|    approx_kl          | 0.010254516 |
|    entropy_loss       | -1.72       |
|    explained_variance | 0.981       |
|    learning_rate      | 0.0001      |
|    loss               | 1.13        |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_86.zip
[EVAL] Mean Return: 526.936, Best Return: 533.602
Saved video to ./runs_smw/videos/Enc5/Enc5_86_526.94.mp4

=== Round 88 | Learn 262144 steps (Total trained: 22806528) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1154     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 22814720 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 941         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 22822912    |
| train/                |             |
|    approx_kl          | 0.011648541 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.193       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_87.zip
[EVAL] Mean Return: 529.723, Best Return: 537.723
Saved video to ./runs_smw/videos/Enc5/Enc5_87_529.72.mp4

=== Round 89 | Learn 262144 steps (Total trained: 23068672) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1122     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 23076864 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 923         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 23085056    |
| train/                |             |
|    approx_kl          | 0.015934218 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.945       |
|    learning_rate      | 0.0001      |
|    loss               | 0.512       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_88.zip
[EVAL] Mean Return: 526.996, Best Return: 534.330
Saved video to ./runs_smw/videos/Enc5/Enc5_88_527.00.mp4

=== Round 90 | Learn 262144 steps (Total trained: 23330816) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1154     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 23339008 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 926         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 23347200    |
| train/                |             |
|    approx_kl          | 0.010986377 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.983       |
|    learning_rate      | 0.0001      |
|    loss               | 0.362       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_89.zip
[EVAL] Mean Return: 345.122, Best Return: 349.789
Saved video to ./runs_smw/videos/Enc5/Enc5_89_345.12.mp4

=== Round 91 | Learn 262144 steps (Total trained: 23592960) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1098     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 23601152 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 911         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 23609344    |
| train/                |             |
|    approx_kl          | 0.010429395 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.954       |
|    learning_rate      | 0.0001      |
|    loss               | 0.398       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_90.zip
[EVAL] Mean Return: 389.660, Best Return: 395.660
Saved video to ./runs_smw/videos/Enc5/Enc5_90_389.66.mp4

=== Round 92 | Learn 262144 steps (Total trained: 23855104) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1110     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 23863296 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 915        |
|    iterations         | 2          |
|    time_elapsed       | 17         |
|    total_timesteps    | 23871488   |
| train/                |            |
|    approx_kl          | 0.01068051 |
|    entropy_loss       | -1.76      |
|    explained_variance | 0.956      |
|    learning_rate      | 0.0001     |
|    loss               | 0.323      |
|    mean_step_reward   | 0.3038082  

Saved checkpoint: ./runs_smw/checkpoints/Enc5_91.zip
[EVAL] Mean Return: 527.492, Best Return: 534.826
Saved video to ./runs_smw/videos/Enc5/Enc5_91_527.49.mp4

=== Round 93 | Learn 262144 steps (Total trained: 24117248) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1110     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 24125440 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 903         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 24133632    |
| train/                |             |
|    approx_kl          | 0.013272627 |
|    entropy_loss       | -1.71       |
|    explained_variance | 0.982       |
|    learning_rate      | 0.0001      |
|    loss               | 0.536       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_92.zip
[EVAL] Mean Return: 527.336, Best Return: 533.336
Saved video to ./runs_smw/videos/Enc5/Enc5_92_527.34.mp4

=== Round 94 | Learn 262144 steps (Total trained: 24379392) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1172     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 24387584 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 922         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 24395776    |
| train/                |             |
|    approx_kl          | 0.010383885 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.981       |
|    learning_rate      | 0.0001      |
|    loss               | 0.276       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_93.zip
[EVAL] Mean Return: 53.854, Best Return: 54.521
Saved video to ./runs_smw/videos/Enc5/Enc5_93_53.85.mp4

=== Round 95 | Learn 262144 steps (Total trained: 24641536) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1096     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 24649728 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 903         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 24657920    |
| train/                |             |
|    approx_kl          | 0.011684719 |
|    entropy_loss       | -1.74       |
|    explained_variance | 0.98        |
|    learning_rate      | 0.0001      |
|    loss               | 0.243       |
|    mean_step_reward   | 0.

Saved checkpoint: ./runs_smw/checkpoints/Enc5_94.zip
[EVAL] Mean Return: 528.300, Best Return: 532.966
Saved video to ./runs_smw/videos/Enc5/Enc5_94_528.30.mp4

=== Round 96 | Learn 262144 steps (Total trained: 24903680) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1103     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 24911872 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 903         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 24920064    |
| train/                |             |
|    approx_kl          | 0.012420714 |
|    entropy_loss       | -1.75       |
|    explained_variance | 0.979       |
|    learning_rate      | 0.0001      |
|    loss               | 0.599       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_95.zip
[EVAL] Mean Return: 533.717, Best Return: 540.383
Saved video to ./runs_smw/videos/Enc5/Enc5_95_533.72.mp4

=== Round 97 | Learn 262144 steps (Total trained: 25165824) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1122     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 25174016 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 898         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 25182208    |
| train/                |             |
|    approx_kl          | 0.011162633 |
|    entropy_loss       | -1.71       |
|    explained_variance | 0.982       |
|    learning_rate      | 0.0001      |
|    loss               | 0.272       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_96.zip
[EVAL] Mean Return: 406.007, Best Return: 411.341
Saved video to ./runs_smw/videos/Enc5/Enc5_96_406.01.mp4

=== Round 98 | Learn 262144 steps (Total trained: 25427968) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1109     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 25436160 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 904         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 25444352    |
| train/                |             |
|    approx_kl          | 0.011430012 |
|    entropy_loss       | -1.72       |
|    explained_variance | 0.976       |
|    learning_rate      | 0.0001      |
|    loss               | 0.543       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_97.zip
[EVAL] Mean Return: 500.108, Best Return: 504.775
Saved video to ./runs_smw/videos/Enc5/Enc5_97_500.11.mp4

=== Round 99 | Learn 262144 steps (Total trained: 25690112) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1129     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 25698304 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 920         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 25706496    |
| train/                |             |
|    approx_kl          | 0.013569259 |
|    entropy_loss       | -1.75       |
|    explained_variance | 0.933       |
|    learning_rate      | 0.0001      |
|    loss               | 0.694       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_98.zip
[EVAL] Mean Return: 406.453, Best Return: 410.453
Saved video to ./runs_smw/videos/Enc5/Enc5_98_406.45.mp4

=== Round 100 | Learn 262144 steps (Total trained: 25952256) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1158     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 25960448 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 925         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 25968640    |
| train/                |             |
|    approx_kl          | 0.013668383 |
|    entropy_loss       | -1.68       |
|    explained_variance | 0.988       |
|    learning_rate      | 0.0001      |
|    loss               | 0.403       |
|    mean_step_reward   

Saved checkpoint: ./runs_smw/checkpoints/Enc5_99.zip
[EVAL] Mean Return: 529.229, Best Return: 534.563
Saved video to ./runs_smw/videos/Enc5/Enc5_99_529.23.mp4

=== Round 101 | Learn 262144 steps (Total trained: 26214400) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1136     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 26222592 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 935         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 26230784    |
| train/                |             |
|    approx_kl          | 0.017072462 |
|    entropy_loss       | -1.59       |
|    explained_variance | 0.993       |
|    learning_rate      | 0.0001      |
|    loss               | 0.118       |
|    mean_step_reward   

Saved checkpoint: ./runs_smw/checkpoints/Enc5_100.zip
[EVAL] Mean Return: 407.943, Best Return: 413.276
Saved video to ./runs_smw/videos/Enc5/Enc5_100_407.94.mp4

=== Round 102 | Learn 262144 steps (Total trained: 26476544) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1078     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 26484736 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 910         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 26492928    |
| train/                |             |
|    approx_kl          | 0.012932764 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.984       |
|    learning_rate      | 0.0001      |
|    loss               | 0.328       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_101.zip
[EVAL] Mean Return: 526.771, Best Return: 531.438
Saved video to ./runs_smw/videos/Enc5/Enc5_101_526.77.mp4

=== Round 103 | Learn 262144 steps (Total trained: 26738688) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1188     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 26746880 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 918         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 26755072    |
| train/                |             |
|    approx_kl          | 0.019605512 |
|    entropy_loss       | -1.64       |
|    explained_variance | 0.989       |
|    learning_rate      | 0.0001      |
|    loss               | 0.625       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_102.zip
[EVAL] Mean Return: 528.878, Best Return: 536.211
Saved video to ./runs_smw/videos/Enc5/Enc5_102_528.88.mp4

=== Round 104 | Learn 262144 steps (Total trained: 27000832) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1140     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 27009024 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 925         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 27017216    |
| train/                |             |
|    approx_kl          | 0.013358331 |
|    entropy_loss       | -1.64       |
|    explained_variance | 0.932       |
|    learning_rate      | 0.0001      |
|    loss               | 0.913       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_103.zip
[EVAL] Mean Return: 406.766, Best Return: 411.433
Saved video to ./runs_smw/videos/Enc5/Enc5_103_406.77.mp4

=== Round 105 | Learn 262144 steps (Total trained: 27262976) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1181     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 27271168 |
---------------------------------
----------------------------------------
| time/                 |              |
|    fps                | 920          |
|    iterations         | 2            |
|    time_elapsed       | 17           |
|    total_timesteps    | 27279360     |
| train/                |              |
|    approx_kl          | 0.0101604825 |
|    entropy_loss       | -1.68        |
|    explained_variance | 0.981        |
|    learning_rate      | 0.0001       |
|    loss               | 0.457        |
|    mean_

Saved checkpoint: ./runs_smw/checkpoints/Enc5_104.zip
[EVAL] Mean Return: 411.574, Best Return: 416.907
Saved video to ./runs_smw/videos/Enc5/Enc5_104_411.57.mp4

=== Round 106 | Learn 262144 steps (Total trained: 27525120) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1151     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 27533312 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 932         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 27541504    |
| train/                |             |
|    approx_kl          | 0.009609401 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.981       |
|    learning_rate      | 0.0001      |
|    loss               | 0.322       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_105.zip
[EVAL] Mean Return: 528.445, Best Return: 536.445
Saved video to ./runs_smw/videos/Enc5/Enc5_105_528.45.mp4

=== Round 107 | Learn 262144 steps (Total trained: 27787264) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1145     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 27795456 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 927         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 27803648    |
| train/                |             |
|    approx_kl          | 0.016413474 |
|    entropy_loss       | -1.63       |
|    explained_variance | 0.963       |
|    learning_rate      | 0.0001      |
|    loss               | 0.959       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_106.zip
[EVAL] Mean Return: 530.047, Best Return: 537.380
Saved video to ./runs_smw/videos/Enc5/Enc5_106_530.05.mp4

=== Round 108 | Learn 262144 steps (Total trained: 28049408) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1130     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 28057600 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 911         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 28065792    |
| train/                |             |
|    approx_kl          | 0.014688649 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.986       |
|    learning_rate      | 0.0001      |
|    loss               | 1.06        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_107.zip
[EVAL] Mean Return: 529.899, Best Return: 536.565
Saved video to ./runs_smw/videos/Enc5/Enc5_107_529.90.mp4

=== Round 109 | Learn 262144 steps (Total trained: 28311552) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1128     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 28319744 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 923         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 28327936    |
| train/                |             |
|    approx_kl          | 0.011783546 |
|    entropy_loss       | -1.73       |
|    explained_variance | 0.983       |
|    learning_rate      | 0.0001      |
|    loss               | 0.495       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_108.zip
[EVAL] Mean Return: 530.938, Best Return: 535.605
Saved video to ./runs_smw/videos/Enc5/Enc5_108_530.94.mp4

=== Round 110 | Learn 262144 steps (Total trained: 28573696) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1124     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 28581888 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 910         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 28590080    |
| train/                |             |
|    approx_kl          | 0.017042223 |
|    entropy_loss       | -1.62       |
|    explained_variance | 0.992       |
|    learning_rate      | 0.0001      |
|    loss               | 0.298       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_109.zip
[EVAL] Mean Return: 534.036, Best Return: 540.703
Saved video to ./runs_smw/videos/Enc5/Enc5_109_534.04.mp4

=== Round 111 | Learn 262144 steps (Total trained: 28835840) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1080     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 28844032 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 906         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 28852224    |
| train/                |             |
|    approx_kl          | 0.014124439 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.938       |
|    learning_rate      | 0.0001      |
|    loss               | 0.402       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_110.zip
[EVAL] Mean Return: 407.349, Best Return: 413.349
Saved video to ./runs_smw/videos/Enc5/Enc5_110_407.35.mp4

=== Round 112 | Learn 262144 steps (Total trained: 29097984) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1111     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 29106176 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 921         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 29114368    |
| train/                |             |
|    approx_kl          | 0.013392907 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.99        |
|    learning_rate      | 0.0001      |
|    loss               | 0.19        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_111.zip
[EVAL] Mean Return: 408.082, Best Return: 413.415
Saved video to ./runs_smw/videos/Enc5/Enc5_111_408.08.mp4

=== Round 113 | Learn 262144 steps (Total trained: 29360128) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1153     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 29368320 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 922         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 29376512    |
| train/                |             |
|    approx_kl          | 0.019176546 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.986       |
|    learning_rate      | 0.0001      |
|    loss               | 0.949       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_112.zip
[EVAL] Mean Return: 532.450, Best Return: 539.117
Saved video to ./runs_smw/videos/Enc5/Enc5_112_532.45.mp4

=== Round 114 | Learn 262144 steps (Total trained: 29622272) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1143     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 29630464 |
---------------------------------
----------------------------------------
| time/                 |              |
|    fps                | 920          |
|    iterations         | 2            |
|    time_elapsed       | 17           |
|    total_timesteps    | 29638656     |
| train/                |              |
|    approx_kl          | 0.0100500975 |
|    entropy_loss       | -1.71        |
|    explained_variance | 0.987        |
|    learning_rate      | 0.0001       |
|    loss               | 0.622        |
|    mean_

Saved checkpoint: ./runs_smw/checkpoints/Enc5_113.zip
[EVAL] Mean Return: 520.137, Best Return: 526.803
Saved video to ./runs_smw/videos/Enc5/Enc5_113_520.14.mp4

=== Round 115 | Learn 262144 steps (Total trained: 29884416) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1159     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 29892608 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 916         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 29900800    |
| train/                |             |
|    approx_kl          | 0.010862414 |
|    entropy_loss       | -1.68       |
|    explained_variance | 0.978       |
|    learning_rate      | 0.0001      |
|    loss               | 0.451       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_114.zip
[EVAL] Mean Return: 37.227, Best Return: 37.894
Saved video to ./runs_smw/videos/Enc5/Enc5_114_37.23.mp4

=== Round 116 | Learn 262144 steps (Total trained: 30146560) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1129     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 30154752 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 918         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 30162944    |
| train/                |             |
|    approx_kl          | 0.016997477 |
|    entropy_loss       | -1.64       |
|    explained_variance | 0.988       |
|    learning_rate      | 0.0001      |
|    loss               | 0.985       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_115.zip
[EVAL] Mean Return: 531.392, Best Return: 538.059
Saved video to ./runs_smw/videos/Enc5/Enc5_115_531.39.mp4

=== Round 117 | Learn 262144 steps (Total trained: 30408704) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1107     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 30416896 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 905         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 30425088    |
| train/                |             |
|    approx_kl          | 0.010849426 |
|    entropy_loss       | -1.64       |
|    explained_variance | 0.991       |
|    learning_rate      | 0.0001      |
|    loss               | 0.643       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_116.zip
[EVAL] Mean Return: 531.408, Best Return: 538.075
Saved video to ./runs_smw/videos/Enc5/Enc5_116_531.41.mp4

=== Round 118 | Learn 262144 steps (Total trained: 30670848) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1171     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 30679040 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 916         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 30687232    |
| train/                |             |
|    approx_kl          | 0.012998598 |
|    entropy_loss       | -1.71       |
|    explained_variance | 0.984       |
|    learning_rate      | 0.0001      |
|    loss               | 0.777       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_117.zip
[EVAL] Mean Return: 495.911, Best Return: 502.578
Saved video to ./runs_smw/videos/Enc5/Enc5_117_495.91.mp4

=== Round 119 | Learn 262144 steps (Total trained: 30932992) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1091     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 30941184 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 900         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 30949376    |
| train/                |             |
|    approx_kl          | 0.020827344 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.962       |
|    learning_rate      | 0.0001      |
|    loss               | 1.92        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_118.zip
[EVAL] Mean Return: 531.084, Best Return: 537.750
Saved video to ./runs_smw/videos/Enc5/Enc5_118_531.08.mp4

=== Round 120 | Learn 262144 steps (Total trained: 31195136) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1070     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 31203328 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 890         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 31211520    |
| train/                |             |
|    approx_kl          | 0.013148207 |
|    entropy_loss       | -1.68       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.648       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_119.zip
[EVAL] Mean Return: 531.731, Best Return: 538.398
Saved video to ./runs_smw/videos/Enc5/Enc5_119_531.73.mp4

=== Round 121 | Learn 262144 steps (Total trained: 31457280) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1124     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 31465472 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 911         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 31473664    |
| train/                |             |
|    approx_kl          | 0.020699717 |
|    entropy_loss       | -1.61       |
|    explained_variance | 0.962       |
|    learning_rate      | 0.0001      |
|    loss               | 0.607       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_120.zip
[EVAL] Mean Return: 532.978, Best Return: 539.645
Saved video to ./runs_smw/videos/Enc5/Enc5_120_532.98.mp4

=== Round 122 | Learn 262144 steps (Total trained: 31719424) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1126     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 31727616 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 919         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 31735808    |
| train/                |             |
|    approx_kl          | 0.015658688 |
|    entropy_loss       | -1.61       |
|    explained_variance | 0.984       |
|    learning_rate      | 0.0001      |
|    loss               | 0.387       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_121.zip
[EVAL] Mean Return: 531.892, Best Return: 538.559
Saved video to ./runs_smw/videos/Enc5/Enc5_121_531.89.mp4

=== Round 123 | Learn 262144 steps (Total trained: 31981568) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1143     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 31989760 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 940         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 31997952    |
| train/                |             |
|    approx_kl          | 0.013148665 |
|    entropy_loss       | -1.64       |
|    explained_variance | 0.974       |
|    learning_rate      | 0.0001      |
|    loss               | 0.293       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_122.zip
[EVAL] Mean Return: 531.487, Best Return: 538.487
Saved video to ./runs_smw/videos/Enc5/Enc5_122_531.49.mp4

=== Round 124 | Learn 262144 steps (Total trained: 32243712) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1177     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 32251904 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 933         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 32260096    |
| train/                |             |
|    approx_kl          | 0.012008362 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.944       |
|    learning_rate      | 0.0001      |
|    loss               | 0.793       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_123.zip
[EVAL] Mean Return: 526.721, Best Return: 533.387
Saved video to ./runs_smw/videos/Enc5/Enc5_123_526.72.mp4

=== Round 125 | Learn 262144 steps (Total trained: 32505856) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1134     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 32514048 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 920         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 32522240    |
| train/                |             |
|    approx_kl          | 0.011923203 |
|    entropy_loss       | -1.7        |
|    explained_variance | 0.983       |
|    learning_rate      | 0.0001      |
|    loss               | 0.539       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_124.zip
[EVAL] Mean Return: 544.593, Best Return: 545.927
Saved video to ./runs_smw/videos/Enc5/Enc5_124_544.59.mp4

=== Round 126 | Learn 262144 steps (Total trained: 32768000) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1123     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 32776192 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 912         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 32784384    |
| train/                |             |
|    approx_kl          | 0.012412662 |
|    entropy_loss       | -1.67       |
|    explained_variance | 0.982       |
|    learning_rate      | 0.0001      |
|    loss               | 0.63        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_125.zip
[EVAL] Mean Return: 519.875, Best Return: 525.875
Saved video to ./runs_smw/videos/Enc5/Enc5_125_519.87.mp4

=== Round 127 | Learn 262144 steps (Total trained: 33030144) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1116     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 33038336 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 914         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 33046528    |
| train/                |             |
|    approx_kl          | 0.012970606 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.982       |
|    learning_rate      | 0.0001      |
|    loss               | 0.86        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_126.zip
[EVAL] Mean Return: 531.538, Best Return: 537.538
Saved video to ./runs_smw/videos/Enc5/Enc5_126_531.54.mp4

=== Round 128 | Learn 262144 steps (Total trained: 33292288) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1119     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 33300480 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 908         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 33308672    |
| train/                |             |
|    approx_kl          | 0.013227928 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.988       |
|    learning_rate      | 0.0001      |
|    loss               | 1.29        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_127.zip
[EVAL] Mean Return: 533.514, Best Return: 541.514
Saved video to ./runs_smw/videos/Enc5/Enc5_127_533.51.mp4

=== Round 129 | Learn 262144 steps (Total trained: 33554432) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1170     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 33562624 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 938         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 33570816    |
| train/                |             |
|    approx_kl          | 0.013507951 |
|    entropy_loss       | -1.72       |
|    explained_variance | 0.968       |
|    learning_rate      | 0.0001      |
|    loss               | 0.314       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_128.zip
[EVAL] Mean Return: 534.911, Best Return: 541.578
Saved video to ./runs_smw/videos/Enc5/Enc5_128_534.91.mp4

=== Round 130 | Learn 262144 steps (Total trained: 33816576) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1172     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 33824768 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 916         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 33832960    |
| train/                |             |
|    approx_kl          | 0.016688399 |
|    entropy_loss       | -1.59       |
|    explained_variance | 0.992       |
|    learning_rate      | 0.0001      |
|    loss               | 0.78        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_129.zip
[EVAL] Mean Return: 538.433, Best Return: 545.100
Saved video to ./runs_smw/videos/Enc5/Enc5_129_538.43.mp4

=== Round 131 | Learn 262144 steps (Total trained: 34078720) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1154     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 34086912 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 919         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 34095104    |
| train/                |             |
|    approx_kl          | 0.011008648 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.986       |
|    learning_rate      | 0.0001      |
|    loss               | 0.425       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_130.zip
[EVAL] Mean Return: 531.754, Best Return: 537.754
Saved video to ./runs_smw/videos/Enc5/Enc5_130_531.75.mp4

=== Round 132 | Learn 262144 steps (Total trained: 34340864) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1135     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 34349056 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 933        |
|    iterations         | 2          |
|    time_elapsed       | 17         |
|    total_timesteps    | 34357248   |
| train/                |            |
|    approx_kl          | 0.01857768 |
|    entropy_loss       | -1.63      |
|    explained_variance | 0.944      |
|    learning_rate      | 0.0001     |
|    loss               | 1.01       |
|    mean_step_reward   | 0.406718

Saved checkpoint: ./runs_smw/checkpoints/Enc5_131.zip
[EVAL] Mean Return: 538.642, Best Return: 545.642
Saved video to ./runs_smw/videos/Enc5/Enc5_131_538.64.mp4

=== Round 133 | Learn 262144 steps (Total trained: 34603008) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1115     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 34611200 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 911         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 34619392    |
| train/                |             |
|    approx_kl          | 0.012087338 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.99        |
|    learning_rate      | 0.0001      |
|    loss               | 0.951       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_132.zip
[EVAL] Mean Return: 542.525, Best Return: 549.192
Saved video to ./runs_smw/videos/Enc5/Enc5_132_542.52.mp4

=== Round 134 | Learn 262144 steps (Total trained: 34865152) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1157     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 34873344 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 936         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 34881536    |
| train/                |             |
|    approx_kl          | 0.016476639 |
|    entropy_loss       | -1.63       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 1.54        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_133.zip
[EVAL] Mean Return: 534.582, Best Return: 541.249
Saved video to ./runs_smw/videos/Enc5/Enc5_133_534.58.mp4

=== Round 135 | Learn 262144 steps (Total trained: 35127296) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1115     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 35135488 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 904         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 35143680    |
| train/                |             |
|    approx_kl          | 0.014355024 |
|    entropy_loss       | -1.67       |
|    explained_variance | 0.981       |
|    learning_rate      | 0.0001      |
|    loss               | 1.53        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_134.zip
[EVAL] Mean Return: 537.730, Best Return: 544.396
Saved video to ./runs_smw/videos/Enc5/Enc5_134_537.73.mp4

=== Round 136 | Learn 262144 steps (Total trained: 35389440) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1100     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 35397632 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 906         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 35405824    |
| train/                |             |
|    approx_kl          | 0.015085512 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.248       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_135.zip
[EVAL] Mean Return: 536.655, Best Return: 543.988
Saved video to ./runs_smw/videos/Enc5/Enc5_135_536.65.mp4

=== Round 137 | Learn 262144 steps (Total trained: 35651584) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1073     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 35659776 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 897         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 35667968    |
| train/                |             |
|    approx_kl          | 0.012173103 |
|    entropy_loss       | -1.73       |
|    explained_variance | 0.981       |
|    learning_rate      | 0.0001      |
|    loss               | 1.34        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_136.zip
[EVAL] Mean Return: 540.510, Best Return: 547.177
Saved video to ./runs_smw/videos/Enc5/Enc5_136_540.51.mp4

=== Round 138 | Learn 262144 steps (Total trained: 35913728) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1158     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 35921920 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 928         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 35930112    |
| train/                |             |
|    approx_kl          | 0.013451444 |
|    entropy_loss       | -1.67       |
|    explained_variance | 0.945       |
|    learning_rate      | 0.0001      |
|    loss               | 1.41        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_137.zip
[EVAL] Mean Return: 544.316, Best Return: 550.982
Saved video to ./runs_smw/videos/Enc5/Enc5_137_544.32.mp4

=== Round 139 | Learn 262144 steps (Total trained: 36175872) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1126     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 36184064 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 920         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 36192256    |
| train/                |             |
|    approx_kl          | 0.015157958 |
|    entropy_loss       | -1.67       |
|    explained_variance | 0.946       |
|    learning_rate      | 0.0001      |
|    loss               | 1.25        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_138.zip
[EVAL] Mean Return: 537.581, Best Return: 544.248
Saved video to ./runs_smw/videos/Enc5/Enc5_138_537.58.mp4

=== Round 140 | Learn 262144 steps (Total trained: 36438016) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1143     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 36446208 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 922         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 36454400    |
| train/                |             |
|    approx_kl          | 0.012119036 |
|    entropy_loss       | -1.61       |
|    explained_variance | 0.973       |
|    learning_rate      | 0.0001      |
|    loss               | 0.543       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_139.zip
[EVAL] Mean Return: 538.352, Best Return: 545.018
Saved video to ./runs_smw/videos/Enc5/Enc5_139_538.35.mp4

=== Round 141 | Learn 262144 steps (Total trained: 36700160) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1155     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 36708352 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 921         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 36716544    |
| train/                |             |
|    approx_kl          | 0.024455197 |
|    entropy_loss       | -1.63       |
|    explained_variance | 0.989       |
|    learning_rate      | 0.0001      |
|    loss               | 0.44        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_140.zip
[EVAL] Mean Return: 490.212, Best Return: 491.545
Saved video to ./runs_smw/videos/Enc5/Enc5_140_490.21.mp4

=== Round 142 | Learn 262144 steps (Total trained: 36962304) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1153     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 36970496 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 925         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 36978688    |
| train/                |             |
|    approx_kl          | 0.012868541 |
|    entropy_loss       | -1.62       |
|    explained_variance | 0.985       |
|    learning_rate      | 0.0001      |
|    loss               | 1.52        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_141.zip
[EVAL] Mean Return: 537.045, Best Return: 543.712
Saved video to ./runs_smw/videos/Enc5/Enc5_141_537.05.mp4

=== Round 143 | Learn 262144 steps (Total trained: 37224448) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1136     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 37232640 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 917         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 37240832    |
| train/                |             |
|    approx_kl          | 0.019590631 |
|    entropy_loss       | -1.56       |
|    explained_variance | 0.99        |
|    learning_rate      | 0.0001      |
|    loss               | 0.201       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_142.zip
[EVAL] Mean Return: 540.303, Best Return: 546.969
Saved video to ./runs_smw/videos/Enc5/Enc5_142_540.30.mp4

=== Round 144 | Learn 262144 steps (Total trained: 37486592) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1120     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 37494784 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 904        |
|    iterations         | 2          |
|    time_elapsed       | 18         |
|    total_timesteps    | 37502976   |
| train/                |            |
|    approx_kl          | 0.00969067 |
|    entropy_loss       | -1.62      |
|    explained_variance | 0.986      |
|    learning_rate      | 0.0001     |
|    loss               | 0.93       |
|    mean_step_reward   | 0.378637

Saved checkpoint: ./runs_smw/checkpoints/Enc5_143.zip
[EVAL] Mean Return: 542.245, Best Return: 548.245
Saved video to ./runs_smw/videos/Enc5/Enc5_143_542.24.mp4

=== Round 145 | Learn 262144 steps (Total trained: 37748736) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1157     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 37756928 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 938         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 37765120    |
| train/                |             |
|    approx_kl          | 0.018775009 |
|    entropy_loss       | -1.61       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.422       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_144.zip
[EVAL] Mean Return: 542.501, Best Return: 549.168
Saved video to ./runs_smw/videos/Enc5/Enc5_144_542.50.mp4

=== Round 146 | Learn 262144 steps (Total trained: 38010880) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1122     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 38019072 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 908         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 38027264    |
| train/                |             |
|    approx_kl          | 0.013545315 |
|    entropy_loss       | -1.64       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.557       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_145.zip
[EVAL] Mean Return: 542.272, Best Return: 549.606
Saved video to ./runs_smw/videos/Enc5/Enc5_145_542.27.mp4

=== Round 147 | Learn 262144 steps (Total trained: 38273024) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1098     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 38281216 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 900         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 38289408    |
| train/                |             |
|    approx_kl          | 0.011799587 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.976       |
|    learning_rate      | 0.0001      |
|    loss               | 0.864       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_146.zip
[EVAL] Mean Return: 539.625, Best Return: 546.958
Saved video to ./runs_smw/videos/Enc5/Enc5_146_539.62.mp4

=== Round 148 | Learn 262144 steps (Total trained: 38535168) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1129     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 38543360 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 904         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 38551552    |
| train/                |             |
|    approx_kl          | 0.013919942 |
|    entropy_loss       | -1.61       |
|    explained_variance | 0.988       |
|    learning_rate      | 0.0001      |
|    loss               | 1.12        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_147.zip
[EVAL] Mean Return: 547.719, Best Return: 554.386
Saved video to ./runs_smw/videos/Enc5/Enc5_147_547.72.mp4

=== Round 149 | Learn 262144 steps (Total trained: 38797312) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1130     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 38805504 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 908         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 38813696    |
| train/                |             |
|    approx_kl          | 0.015533638 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.978       |
|    learning_rate      | 0.0001      |
|    loss               | 1.17        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_148.zip
[EVAL] Mean Return: 542.185, Best Return: 548.852
Saved video to ./runs_smw/videos/Enc5/Enc5_148_542.19.mp4

=== Round 150 | Learn 262144 steps (Total trained: 39059456) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1114     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 39067648 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 885         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 39075840    |
| train/                |             |
|    approx_kl          | 0.011191668 |
|    entropy_loss       | -1.69       |
|    explained_variance | 0.953       |
|    learning_rate      | 0.0001      |
|    loss               | 0.941       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_149.zip
[EVAL] Mean Return: 535.168, Best Return: 543.168
Saved video to ./runs_smw/videos/Enc5/Enc5_149_535.17.mp4

=== Round 151 | Learn 262144 steps (Total trained: 39321600) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1156     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 39329792 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 928        |
|    iterations         | 2          |
|    time_elapsed       | 17         |
|    total_timesteps    | 39337984   |
| train/                |            |
|    approx_kl          | 0.01609219 |
|    entropy_loss       | -1.67      |
|    explained_variance | 0.992      |
|    learning_rate      | 0.0001     |
|    loss               | 0.207      |
|    mean_step_reward   | 0.411688

Saved checkpoint: ./runs_smw/checkpoints/Enc5_150.zip
[EVAL] Mean Return: 542.874, Best Return: 549.541
Saved video to ./runs_smw/videos/Enc5/Enc5_150_542.87.mp4

=== Round 152 | Learn 262144 steps (Total trained: 39583744) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1161     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 39591936 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 915         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 39600128    |
| train/                |             |
|    approx_kl          | 0.026770497 |
|    entropy_loss       | -1.58       |
|    explained_variance | 0.995       |
|    learning_rate      | 0.0001      |
|    loss               | 0.112       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_151.zip
[EVAL] Mean Return: 543.280, Best Return: 548.613
Saved video to ./runs_smw/videos/Enc5/Enc5_151_543.28.mp4

=== Round 153 | Learn 262144 steps (Total trained: 39845888) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1106     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 39854080 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 896         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 39862272    |
| train/                |             |
|    approx_kl          | 0.012563874 |
|    entropy_loss       | -1.61       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.96        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_152.zip
[EVAL] Mean Return: 536.492, Best Return: 543.159
Saved video to ./runs_smw/videos/Enc5/Enc5_152_536.49.mp4

=== Round 154 | Learn 262144 steps (Total trained: 40108032) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1098     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 40116224 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 922        |
|    iterations         | 2          |
|    time_elapsed       | 17         |
|    total_timesteps    | 40124416   |
| train/                |            |
|    approx_kl          | 0.01371626 |
|    entropy_loss       | -1.6       |
|    explained_variance | 0.984      |
|    learning_rate      | 0.0001     |
|    loss               | 0.677      |
|    mean_step_reward   | 0.376587

Saved checkpoint: ./runs_smw/checkpoints/Enc5_153.zip
[EVAL] Mean Return: 541.615, Best Return: 548.949
Saved video to ./runs_smw/videos/Enc5/Enc5_153_541.62.mp4

=== Round 155 | Learn 262144 steps (Total trained: 40370176) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1124     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 40378368 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 920         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 40386560    |
| train/                |             |
|    approx_kl          | 0.015083667 |
|    entropy_loss       | -1.6        |
|    explained_variance | 0.99        |
|    learning_rate      | 0.0001      |
|    loss               | 0.352       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_154.zip
[EVAL] Mean Return: 543.304, Best Return: 549.970
Saved video to ./runs_smw/videos/Enc5/Enc5_154_543.30.mp4

=== Round 156 | Learn 262144 steps (Total trained: 40632320) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1088     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 40640512 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 901         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 40648704    |
| train/                |             |
|    approx_kl          | 0.013712841 |
|    entropy_loss       | -1.59       |
|    explained_variance | 0.965       |
|    learning_rate      | 0.0001      |
|    loss               | 1.12        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_155.zip
[EVAL] Mean Return: 535.866, Best Return: 543.199
Saved video to ./runs_smw/videos/Enc5/Enc5_155_535.87.mp4

=== Round 157 | Learn 262144 steps (Total trained: 40894464) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1117     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 40902656 |
---------------------------------
----------------------------------------
| time/                 |              |
|    fps                | 917          |
|    iterations         | 2            |
|    time_elapsed       | 17           |
|    total_timesteps    | 40910848     |
| train/                |              |
|    approx_kl          | 0.0145466495 |
|    entropy_loss       | -1.59        |
|    explained_variance | 0.971        |
|    learning_rate      | 0.0001       |
|    loss               | 0.94         |
|    mean_

Saved checkpoint: ./runs_smw/checkpoints/Enc5_156.zip
[EVAL] Mean Return: 538.316, Best Return: 544.983
Saved video to ./runs_smw/videos/Enc5/Enc5_156_538.32.mp4

=== Round 158 | Learn 262144 steps (Total trained: 41156608) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1143     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 41164800 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 937         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 41172992    |
| train/                |             |
|    approx_kl          | 0.016647315 |
|    entropy_loss       | -1.62       |
|    explained_variance | 0.988       |
|    learning_rate      | 0.0001      |
|    loss               | 1.1         |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_157.zip
[EVAL] Mean Return: 543.519, Best Return: 550.185
Saved video to ./runs_smw/videos/Enc5/Enc5_157_543.52.mp4

=== Round 159 | Learn 262144 steps (Total trained: 41418752) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1121     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 41426944 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 934        |
|    iterations         | 2          |
|    time_elapsed       | 17         |
|    total_timesteps    | 41435136   |
| train/                |            |
|    approx_kl          | 0.01195449 |
|    entropy_loss       | -1.61      |
|    explained_variance | 0.99       |
|    learning_rate      | 0.0001     |
|    loss               | 0.377      |
|    mean_step_reward   | 0.444448

Saved checkpoint: ./runs_smw/checkpoints/Enc5_158.zip
[EVAL] Mean Return: 536.749, Best Return: 543.415
Saved video to ./runs_smw/videos/Enc5/Enc5_158_536.75.mp4

=== Round 160 | Learn 262144 steps (Total trained: 41680896) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1117     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 41689088 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 949        |
|    iterations         | 2          |
|    time_elapsed       | 17         |
|    total_timesteps    | 41697280   |
| train/                |            |
|    approx_kl          | 0.01259605 |
|    entropy_loss       | -1.63      |
|    explained_variance | 0.991      |
|    learning_rate      | 0.0001     |
|    loss               | 0.542      |
|    mean_step_reward   | 0.422787

Saved checkpoint: ./runs_smw/checkpoints/Enc5_159.zip
[EVAL] Mean Return: 420.143, Best Return: 424.810
Saved video to ./runs_smw/videos/Enc5/Enc5_159_420.14.mp4

=== Round 161 | Learn 262144 steps (Total trained: 41943040) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1108     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 41951232 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 903         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 41959424    |
| train/                |             |
|    approx_kl          | 0.014339831 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.984       |
|    learning_rate      | 0.0001      |
|    loss               | 0.225       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_160.zip
[EVAL] Mean Return: 542.855, Best Return: 549.521
Saved video to ./runs_smw/videos/Enc5/Enc5_160_542.85.mp4

=== Round 162 | Learn 262144 steps (Total trained: 42205184) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1143     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 42213376 |
---------------------------------
----------------------------------------
| time/                 |              |
|    fps                | 922          |
|    iterations         | 2            |
|    time_elapsed       | 17           |
|    total_timesteps    | 42221568     |
| train/                |              |
|    approx_kl          | 0.0150622325 |
|    entropy_loss       | -1.61        |
|    explained_variance | 0.989        |
|    learning_rate      | 0.0001       |
|    loss               | 0.674        |
|    mean_

Saved checkpoint: ./runs_smw/checkpoints/Enc5_161.zip
[EVAL] Mean Return: 539.502, Best Return: 546.835
Saved video to ./runs_smw/videos/Enc5/Enc5_161_539.50.mp4

=== Round 163 | Learn 262144 steps (Total trained: 42467328) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1107     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 42475520 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 892         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 42483712    |
| train/                |             |
|    approx_kl          | 0.015687447 |
|    entropy_loss       | -1.6        |
|    explained_variance | 0.995       |
|    learning_rate      | 0.0001      |
|    loss               | 0.158       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_162.zip
[EVAL] Mean Return: 543.879, Best Return: 551.213
Saved video to ./runs_smw/videos/Enc5/Enc5_162_543.88.mp4

=== Round 164 | Learn 262144 steps (Total trained: 42729472) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1101     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 42737664 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 895         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 42745856    |
| train/                |             |
|    approx_kl          | 0.012835918 |
|    entropy_loss       | -1.61       |
|    explained_variance | 0.99        |
|    learning_rate      | 0.0001      |
|    loss               | 0.42        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_163.zip
[EVAL] Mean Return: 542.999, Best Return: 549.666
Saved video to ./runs_smw/videos/Enc5/Enc5_163_543.00.mp4

=== Round 165 | Learn 262144 steps (Total trained: 42991616) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1139     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 42999808 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 914         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 43008000    |
| train/                |             |
|    approx_kl          | 0.020244198 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.955       |
|    learning_rate      | 0.0001      |
|    loss               | 1.14        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_164.zip
[EVAL] Mean Return: 542.791, Best Return: 549.458
Saved video to ./runs_smw/videos/Enc5/Enc5_164_542.79.mp4

=== Round 166 | Learn 262144 steps (Total trained: 43253760) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1086     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 43261952 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 908         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 43270144    |
| train/                |             |
|    approx_kl          | 0.013504708 |
|    entropy_loss       | -1.62       |
|    explained_variance | 0.986       |
|    learning_rate      | 0.0001      |
|    loss               | 0.557       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_165.zip
[EVAL] Mean Return: 543.193, Best Return: 550.527
Saved video to ./runs_smw/videos/Enc5/Enc5_165_543.19.mp4

=== Round 167 | Learn 262144 steps (Total trained: 43515904) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1161     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 43524096 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 928         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 43532288    |
| train/                |             |
|    approx_kl          | 0.014016049 |
|    entropy_loss       | -1.57       |
|    explained_variance | 0.991       |
|    learning_rate      | 0.0001      |
|    loss               | 0.528       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_166.zip
[EVAL] Mean Return: 537.822, Best Return: 545.156
Saved video to ./runs_smw/videos/Enc5/Enc5_166_537.82.mp4

=== Round 168 | Learn 262144 steps (Total trained: 43778048) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1124     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 43786240 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 900         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 43794432    |
| train/                |             |
|    approx_kl          | 0.012308968 |
|    entropy_loss       | -1.61       |
|    explained_variance | 0.993       |
|    learning_rate      | 0.0001      |
|    loss               | 0.142       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_167.zip
[EVAL] Mean Return: 542.622, Best Return: 549.955
Saved video to ./runs_smw/videos/Enc5/Enc5_167_542.62.mp4

=== Round 169 | Learn 262144 steps (Total trained: 44040192) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1164     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 44048384 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 953         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 44056576    |
| train/                |             |
|    approx_kl          | 0.017765494 |
|    entropy_loss       | -1.66       |
|    explained_variance | 0.989       |
|    learning_rate      | 0.0001      |
|    loss               | 0.754       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_168.zip
[EVAL] Mean Return: 545.408, Best Return: 552.075
Saved video to ./runs_smw/videos/Enc5/Enc5_168_545.41.mp4

=== Round 170 | Learn 262144 steps (Total trained: 44302336) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1145     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 44310528 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 918         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 44318720    |
| train/                |             |
|    approx_kl          | 0.012947025 |
|    entropy_loss       | -1.59       |
|    explained_variance | 0.986       |
|    learning_rate      | 0.0001      |
|    loss               | 0.379       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_169.zip
[EVAL] Mean Return: 542.523, Best Return: 549.190
Saved video to ./runs_smw/videos/Enc5/Enc5_169_542.52.mp4

=== Round 171 | Learn 262144 steps (Total trained: 44564480) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1161     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 44572672 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 925         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 44580864    |
| train/                |             |
|    approx_kl          | 0.014012416 |
|    entropy_loss       | -1.57       |
|    explained_variance | 0.968       |
|    learning_rate      | 0.0001      |
|    loss               | 0.509       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_170.zip
[EVAL] Mean Return: 538.829, Best Return: 545.495
Saved video to ./runs_smw/videos/Enc5/Enc5_170_538.83.mp4

=== Round 172 | Learn 262144 steps (Total trained: 44826624) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1106     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 44834816 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 912         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 44843008    |
| train/                |             |
|    approx_kl          | 0.012572829 |
|    entropy_loss       | -1.6        |
|    explained_variance | 0.96        |
|    learning_rate      | 0.0001      |
|    loss               | 0.28        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_171.zip
[EVAL] Mean Return: 542.215, Best Return: 549.548
Saved video to ./runs_smw/videos/Enc5/Enc5_171_542.22.mp4

=== Round 173 | Learn 262144 steps (Total trained: 45088768) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1155     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 45096960 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 920         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 45105152    |
| train/                |             |
|    approx_kl          | 0.014302738 |
|    entropy_loss       | -1.58       |
|    explained_variance | 0.991       |
|    learning_rate      | 0.0001      |
|    loss               | 0.287       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_172.zip
[EVAL] Mean Return: 542.735, Best Return: 550.068
Saved video to ./runs_smw/videos/Enc5/Enc5_172_542.73.mp4

=== Round 174 | Learn 262144 steps (Total trained: 45350912) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1130     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 45359104 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 919         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 45367296    |
| train/                |             |
|    approx_kl          | 0.015561959 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.984       |
|    learning_rate      | 0.0001      |
|    loss               | 1.09        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_173.zip
[EVAL] Mean Return: 21.140, Best Return: 21.807
Saved video to ./runs_smw/videos/Enc5/Enc5_173_21.14.mp4

=== Round 175 | Learn 262144 steps (Total trained: 45613056) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1121     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 45621248 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 912         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 45629440    |
| train/                |             |
|    approx_kl          | 0.016805546 |
|    entropy_loss       | -1.62       |
|    explained_variance | 0.991       |
|    learning_rate      | 0.0001      |
|    loss               | 0.397       |
|    mean_step_reward   |

Saved checkpoint: ./runs_smw/checkpoints/Enc5_174.zip
[EVAL] Mean Return: 542.762, Best Return: 550.429
Saved video to ./runs_smw/videos/Enc5/Enc5_174_542.76.mp4

=== Round 176 | Learn 262144 steps (Total trained: 45875200) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1145     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 45883392 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 935         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 45891584    |
| train/                |             |
|    approx_kl          | 0.012738137 |
|    entropy_loss       | -1.64       |
|    explained_variance | 0.988       |
|    learning_rate      | 0.0001      |
|    loss               | 1.06        |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_175.zip
[EVAL] Mean Return: 542.932, Best Return: 549.599
Saved video to ./runs_smw/videos/Enc5/Enc5_175_542.93.mp4

=== Round 177 | Learn 262144 steps (Total trained: 46137344) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1128     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 46145536 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 931        |
|    iterations         | 2          |
|    time_elapsed       | 17         |
|    total_timesteps    | 46153728   |
| train/                |            |
|    approx_kl          | 0.01783223 |
|    entropy_loss       | -1.57      |
|    explained_variance | 0.99       |
|    learning_rate      | 0.0001     |
|    loss               | 0.326      |
|    mean_step_reward   | 0.456368

Saved checkpoint: ./runs_smw/checkpoints/Enc5_176.zip
[EVAL] Mean Return: 543.462, Best Return: 550.128
Saved video to ./runs_smw/videos/Enc5/Enc5_176_543.46.mp4

=== Round 178 | Learn 262144 steps (Total trained: 46399488) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1081     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 46407680 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 896         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 46415872    |
| train/                |             |
|    approx_kl          | 0.017213155 |
|    entropy_loss       | -1.67       |
|    explained_variance | 0.984       |
|    learning_rate      | 0.0001      |
|    loss               | 0.532       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_177.zip
[EVAL] Mean Return: 540.701, Best Return: 547.368
Saved video to ./runs_smw/videos/Enc5/Enc5_177_540.70.mp4

=== Round 179 | Learn 262144 steps (Total trained: 46661632) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1110     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 46669824 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 899         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 46678016    |
| train/                |             |
|    approx_kl          | 0.018571056 |
|    entropy_loss       | -1.65       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.877       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_178.zip
[EVAL] Mean Return: 543.260, Best Return: 549.927
Saved video to ./runs_smw/videos/Enc5/Enc5_178_543.26.mp4

=== Round 180 | Learn 262144 steps (Total trained: 46923776) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1179     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 46931968 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 939         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 46940160    |
| train/                |             |
|    approx_kl          | 0.014438041 |
|    entropy_loss       | -1.58       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | 0.652       |
|    mean_step_reward 

Saved checkpoint: ./runs_smw/checkpoints/Enc5_179.zip
[EVAL] Mean Return: 543.359, Best Return: 550.025
Saved video to ./runs_smw/videos/Enc5/Enc5_179_543.36.mp4

=== Round 181 | Learn 262144 steps (Total trained: 47185920) ===
Logging to ./runs_smw/tb/Enc5_0
---------------------------------
| time/              |          |
|    fps             | 1154     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 47194112 |
---------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 934       |
|    iterations         | 2         |
|    time_elapsed       | 17        |
|    total_timesteps    | 47202304  |
| train/                |           |
|    approx_kl          | 0.0122112 |
|    entropy_loss       | -1.64     |
|    explained_variance | 0.967     |
|    learning_rate      | 0.0001    |
|    loss               | 0.306     |
|    mean_step_reward   | 0.3493291 |
|    n_u

## Display Video

In [None]:
from IPython.display import Video
import glob
# label = "Dec22A"

# list_of_files = glob.glob(os.path.join(VIDEO_DIR, label, '*.mp4')) 
# if list_of_files:
#     latest_file = max(list_of_files, key=os.path.getctime)
#     print(f"Playing: {latest_file}")
#     latest_file = "runs_smw/videos/Dec22A/Dec22A_73_596.54.mp4"
#     print(f"Playing: {latest_file}")
#     display(Video(latest_file, embed=True, width=768))
# else:
#     print("No videos found yet.")
    
video = "./runs_smw/videos/Dec22A/Dec22A_73_596.54.mp4"
# display(Video(video, embed=True, width=768))

In [None]:
import cv2

cap = cv2.VideoCapture("runs_smw/videos/test_16.mp4")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    cv2.imshow("Frame-by-Frame", frame)

    # 關鍵：這裡等待按鍵。按 'n' 鍵跳到下一幀，按 'q' 離開
    key = cv2.waitKey(0) 
    if key == ord('q'):
        break
    elif key == ord('n'):
        continue

cap.release()
cv2.destroyAllWindows()