# 2025 DL Lab8: RL Assignment_Super Mario World

**Your Answer:**    
Hi I'm XXX, XXXXXXXXXX.

## Overview
This project implements a **Deep Reinforcement Learning** pipeline to train an autonomous agent for Super Mario World. Leveraging the **Proximal Policy Optimization (PPO)** algorithm, the system interacts with the **stable-retro** environment to master the YoshiIsland1 level. Key components include a custom Vision Backbone for extracting features from raw pixel data and a suite of Environment Wrappers that handle frame preprocessing, action discretization, and reward shaping to facilitate efficient learning.

Reward function implement  
should do something in the beginning (monster attack)  
Custom PPO implement  
pre train weight 差不多，主要是 reward function  
model weight capacity 1GB  
class name 不要動 (可以新增，但是原本有的不要動)

## Imports

In [1]:
import os
import numpy as np
import retro
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.vec_env import VecNormalize

from eval import evaluate_policy, record_video
from custom_policy import VisionBackbonePolicy, CustomPPO

  from .autonotebook import tqdm as notebook_tqdm


## Configuration

In [2]:
# Game Settings
GAME = "SuperMarioWorld-Snes"
STATE = "YoshiIsland1"

# Training Settings
BASE_CHUNK  = 8192
TRAIN_CHUNK = BASE_CHUNK * 32
TOTAL_STEPS = TRAIN_CHUNK * 256
N_ENVS = 16

# Evaluation & Recording Settingsc
EVAL_EPISODES = 3
EVAL_MAX_STEPS = 18000
RECORD_STEPS = 1200

# Directories
LOG_DIR = "./runs_smw"
VIDEO_DIR       = os.path.join(LOG_DIR, "videos")
CKPT_DIR        = os.path.join(LOG_DIR, "checkpoints")
TENSORBOARD_LOG = os.path.join(LOG_DIR, "tb")

os.makedirs(LOG_DIR,   exist_ok=True)
os.makedirs(CKPT_DIR,  exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)

## Environment Functions

In [3]:
from wrappers import make_base_env
def _make_env_thunk(game: str, state: str):
    """Return a function that creates an environment (for multiprocessing)."""
    def _thunk():
        return make_base_env(game, state)
    return _thunk

def make_vec_env(game: str, state: str, n_envs: int, use_subproc: bool = True):
    """Create a vectorized environment (multiple envs running in parallel)."""
    env_fns = [_make_env_thunk(game, state) for _ in range(n_envs)]
    
    if use_subproc and n_envs > 1:
        vec_env = SubprocVecEnv(env_fns)
    else:
        vec_env = DummyVecEnv(env_fns)

    return vec_env

## Initialize Env & Model

In [4]:
# 1. Create Training Environment
train_env = make_vec_env(GAME, STATE, n_envs=N_ENVS)
# train_env = VecNormalize(train_env, norm_obs=True, norm_reward=True, clip_obs=10., clip_reward=10.)
print(f"Environment created: {GAME} - {STATE} with {N_ENVS} parallel envs.")

checkpoint_path = "None"
checkpoint_path = "runs_smw/checkpoints/NoArti_147.zip"

best_mean = -1e18
trained = 0
round_idx = 0

# 2. Initialize Model
if os.path.exists(checkpoint_path):
    # 讀取現有模型
    model = CustomPPO.load(
        checkpoint_path, 
        env=train_env,
        device="cuda:0" # 確保使用 GPU
    )
    trained = model.num_timesteps
    round_idx = int(trained / TRAIN_CHUNK)
    print(f"[Sucess] Loaded model from {checkpoint_path}")
    print(f"trained: {trained}, round_index: {round_idx}")
else:
    print(f"[Fail] Can't load {checkpoint_path}. Will use new model")
    model = CustomPPO(
        VisionBackbonePolicy,
        train_env,
        policy_kwargs   = dict(normalize_images=False),
        n_epochs        = 4,
        n_steps         = 512,
        batch_size      = 512,
        learning_rate   = 1e-4,
        verbose         = 1,
        gamma           = 0.9875,
        gae_lambda      = 0.975,
        kl_coef         = 1,
        clip_range      = 0.125,
        ent_coef        = 0.045,
        tensorboard_log = TENSORBOARD_LOG,
    )

Environment created: SuperMarioWorld-Snes - YoshiIsland1 with 16 parallel envs.
[Sucess] Loaded model from runs_smw/checkpoints/NoArti_147.zip
trained: 38797312, round_index: 148


## Training Loop

In [5]:
try:
    while trained < TOTAL_STEPS:
        round_idx += 1
        chunk = min(TRAIN_CHUNK, TOTAL_STEPS - trained)
        # chunk = 2000
        label = "NoArti"
        tagged_label = f"{label}_{int(trained/TRAIN_CHUNK)}"

        print(f"\n=== Round {round_idx} | Learn {chunk} steps (Total trained: {trained}) ===")
        
        # --- Train ---
        model.learn(total_timesteps=chunk, reset_num_timesteps=False, tb_log_name=label)
        trained += chunk

        # --- Save Checkpoint ---
        ckpt_path = os.path.join(CKPT_DIR, f"{tagged_label}.zip")
        model.save(ckpt_path)
        print(f"Saved checkpoint: {ckpt_path}")

        # --- Evaluate ---
        mean_ret, best_ret = evaluate_policy(
            model,
            GAME,
            STATE,
            n_episodes=EVAL_EPISODES,
            max_steps=EVAL_MAX_STEPS,
        )
        print(f"[EVAL] Mean Return: {mean_ret:.3f}, Best Return: {best_ret:.3f}")

        # --- Record Video ---
        out_path = os.path.join(VIDEO_DIR, label)
        os.makedirs(out_path,  exist_ok=True)
        record_video(
            model,
            GAME,
            STATE,
            VIDEO_DIR,
            video_len=RECORD_STEPS,
            prefix=f"{label}/{tagged_label}_{mean_ret:.2f}",
        )

except KeyboardInterrupt:
    print("\nTraining interrupted manually.")

finally:
    train_env.close()
    print("Training finished. Environment closed.")
    
"""
tensorboard --logdir=./runs_smw/tb
"""


=== Round 149 | Learn 262144 steps (Total trained: 38797312) ===
Logging to ./runs_smw/tb/NoArti_0


---------------------------------
| time/              |          |
|    fps             | 1075     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 38805504 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 893         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 38813696    |
| train/                |             |
|    approx_kl          | 0.029153772 |
|    entropy_loss       | -1.89       |
|    explained_variance | 0.794       |
|    learning_rate      | 0.0001      |
|    loss               | 0.0381      |
|    mean_step_reward   | 0.058806583 |
|    n_updates          | 4/128       |
|    policyGradLoss     | -0.0139     |
|    value_loss         | 0.408       |
---------------------------------------
---------------------------------------
| time/                 |             |
| 

Saved checkpoint: ./runs_smw/checkpoints/NoArti_148.zip
[EVAL] Mean Return: 132.989, Best Return: 135.589
Saved video to ./runs_smw/videos/NoArti/NoArti_148_132.99.mp4

=== Round 150 | Learn 262144 steps (Total trained: 39059456) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1439     |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 39067648 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 1090        |
|    iterations         | 2           |
|    time_elapsed       | 15          |
|    total_timesteps    | 39075840    |
| train/                |             |
|    approx_kl          | 0.021987619 |
|    entropy_loss       | -2.05       |
|    explained_variance | 0.941       |
|    learning_rate      | 0.0001      |
|    loss               | 0.123       |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_149.zip
[EVAL] Mean Return: 80.233, Best Return: 82.633
Saved video to ./runs_smw/videos/NoArti/NoArti_149_80.23.mp4

=== Round 151 | Learn 262144 steps (Total trained: 39321600) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1171     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 39329792 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 990         |
|    iterations         | 2           |
|    time_elapsed       | 16          |
|    total_timesteps    | 39337984    |
| train/                |             |
|    approx_kl          | 0.020859681 |
|    entropy_loss       | -2.12       |
|    explained_variance | 0.961       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0903     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_150.zip
[EVAL] Mean Return: 134.097, Best Return: 136.697
Saved video to ./runs_smw/videos/NoArti/NoArti_150_134.10.mp4

=== Round 152 | Learn 262144 steps (Total trained: 39583744) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1426     |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 39591936 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 1071        |
|    iterations         | 2           |
|    time_elapsed       | 15          |
|    total_timesteps    | 39600128    |
| train/                |             |
|    approx_kl          | 0.035528522 |
|    entropy_loss       | -2.08       |
|    explained_variance | 0.956       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0847     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_151.zip
[EVAL] Mean Return: 133.625, Best Return: 136.225
Saved video to ./runs_smw/videos/NoArti/NoArti_151_133.62.mp4

=== Round 153 | Learn 262144 steps (Total trained: 39845888) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1446     |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 39854080 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 983         |
|    iterations         | 2           |
|    time_elapsed       | 16          |
|    total_timesteps    | 39862272    |
| train/                |             |
|    approx_kl          | 0.020713303 |
|    entropy_loss       | -2.17       |
|    explained_variance | 0.953       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0822     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_152.zip
[EVAL] Mean Return: 125.645, Best Return: 128.245
Saved video to ./runs_smw/videos/NoArti/NoArti_152_125.65.mp4

=== Round 154 | Learn 262144 steps (Total trained: 40108032) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1092     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 40116224 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 859        |
|    iterations         | 2          |
|    time_elapsed       | 19         |
|    total_timesteps    | 40124416   |
| train/                |            |
|    approx_kl          | 0.02325277 |
|    entropy_loss       | -2.04      |
|    explained_variance | 0.97       |
|    learning_rate      | 0.0001     |
|    loss               | -0.0492    |
|    mean_step_reward   | 

Saved checkpoint: ./runs_smw/checkpoints/NoArti_153.zip
[EVAL] Mean Return: 131.616, Best Return: 134.016
Saved video to ./runs_smw/videos/NoArti/NoArti_153_131.62.mp4

=== Round 155 | Learn 262144 steps (Total trained: 40370176) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1131     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 40378368 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 887         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 40386560    |
| train/                |             |
|    approx_kl          | 0.023774642 |
|    entropy_loss       | -2.03       |
|    explained_variance | 0.987       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0523     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_154.zip
[EVAL] Mean Return: 122.517, Best Return: 125.117
Saved video to ./runs_smw/videos/NoArti/NoArti_154_122.52.mp4

=== Round 156 | Learn 262144 steps (Total trained: 40632320) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1061     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 40640512 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 855         |
|    iterations         | 2           |
|    time_elapsed       | 19          |
|    total_timesteps    | 40648704    |
| train/                |             |
|    approx_kl          | 0.017158814 |
|    entropy_loss       | -2.15       |
|    explained_variance | 0.873       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0387     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_155.zip
[EVAL] Mean Return: 130.640, Best Return: 133.240
Saved video to ./runs_smw/videos/NoArti/NoArti_155_130.64.mp4

=== Round 157 | Learn 262144 steps (Total trained: 40894464) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1167     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 40902656 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 903         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 40910848    |
| train/                |             |
|    approx_kl          | 0.021097202 |
|    entropy_loss       | -2.11       |
|    explained_variance | 0.968       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0665     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_156.zip
[EVAL] Mean Return: 134.077, Best Return: 136.677
Saved video to ./runs_smw/videos/NoArti/NoArti_156_134.08.mp4

=== Round 158 | Learn 262144 steps (Total trained: 41156608) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1072     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 41164800 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 855         |
|    iterations         | 2           |
|    time_elapsed       | 19          |
|    total_timesteps    | 41172992    |
| train/                |             |
|    approx_kl          | 0.031176385 |
|    entropy_loss       | -2.19       |
|    explained_variance | 0.935       |
|    learning_rate      | 0.0001      |
|    loss               | -0.107      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_157.zip
[EVAL] Mean Return: 133.541, Best Return: 135.941
Saved video to ./runs_smw/videos/NoArti/NoArti_157_133.54.mp4

=== Round 159 | Learn 262144 steps (Total trained: 41418752) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1181     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 41426944 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 899         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 41435136    |
| train/                |             |
|    approx_kl          | 0.027427636 |
|    entropy_loss       | -2.13       |
|    explained_variance | 0.959       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0966     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_158.zip
[EVAL] Mean Return: 3.929, Best Return: 5.737
Saved video to ./runs_smw/videos/NoArti/NoArti_158_3.93.mp4

=== Round 160 | Learn 262144 steps (Total trained: 41680896) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1129     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 41689088 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 865         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 41697280    |
| train/                |             |
|    approx_kl          | 0.0188803   |
|    entropy_loss       | -2.16       |
|    explained_variance | 0.895       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0668     |
|    mean_step_rewar

Saved checkpoint: ./runs_smw/checkpoints/NoArti_159.zip
[EVAL] Mean Return: 132.828, Best Return: 135.428
Saved video to ./runs_smw/videos/NoArti/NoArti_159_132.83.mp4

=== Round 161 | Learn 262144 steps (Total trained: 41943040) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1160     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 41951232 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 898         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 41959424    |
| train/                |             |
|    approx_kl          | 0.023676105 |
|    entropy_loss       | -2.1        |
|    explained_variance | 0.868       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0684     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_160.zip
[EVAL] Mean Return: 73.647, Best Return: 75.647
Saved video to ./runs_smw/videos/NoArti/NoArti_160_73.65.mp4

=== Round 162 | Learn 262144 steps (Total trained: 42205184) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1196     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 42213376 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 900         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 42221568    |
| train/                |             |
|    approx_kl          | 0.026592143 |
|    entropy_loss       | -2.03       |
|    explained_variance | 0.932       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0878     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_161.zip
[EVAL] Mean Return: 134.075, Best Return: 136.675
Saved video to ./runs_smw/videos/NoArti/NoArti_161_134.08.mp4

=== Round 163 | Learn 262144 steps (Total trained: 42467328) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1543     |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 42475520 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 1148        |
|    iterations         | 2           |
|    time_elapsed       | 14          |
|    total_timesteps    | 42483712    |
| train/                |             |
|    approx_kl          | 0.025016516 |
|    entropy_loss       | -2.21       |
|    explained_variance | 0.962       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0609     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_162.zip
[EVAL] Mean Return: 31.925, Best Return: 32.525
Saved video to ./runs_smw/videos/NoArti/NoArti_162_31.93.mp4

=== Round 164 | Learn 262144 steps (Total trained: 42729472) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1188     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 42737664 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 901         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 42745856    |
| train/                |             |
|    approx_kl          | 0.014647765 |
|    entropy_loss       | -2.25       |
|    explained_variance | 0.731       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0696     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_163.zip
[EVAL] Mean Return: -6.596, Best Return: -4.988
Saved video to ./runs_smw/videos/NoArti/NoArti_163_-6.60.mp4

=== Round 165 | Learn 262144 steps (Total trained: 42991616) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1091     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 42999808 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 869         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 43008000    |
| train/                |             |
|    approx_kl          | 0.027234478 |
|    entropy_loss       | -2.27       |
|    explained_variance | 0.942       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0942     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_164.zip
[EVAL] Mean Return: 44.276, Best Return: 46.076
Saved video to ./runs_smw/videos/NoArti/NoArti_164_44.28.mp4

=== Round 166 | Learn 262144 steps (Total trained: 43253760) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1125     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 43261952 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 895         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 43270144    |
| train/                |             |
|    approx_kl          | 0.024470601 |
|    entropy_loss       | -2.24       |
|    explained_variance | 0.916       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0978     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_165.zip
[EVAL] Mean Return: 85.266, Best Return: 87.266
Saved video to ./runs_smw/videos/NoArti/NoArti_165_85.27.mp4

=== Round 167 | Learn 262144 steps (Total trained: 43515904) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1061     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 43524096 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 849         |
|    iterations         | 2           |
|    time_elapsed       | 19          |
|    total_timesteps    | 43532288    |
| train/                |             |
|    approx_kl          | 0.024751732 |
|    entropy_loss       | -2.22       |
|    explained_variance | 0.946       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0997     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_166.zip
[EVAL] Mean Return: 10.313, Best Return: 10.713
Saved video to ./runs_smw/videos/NoArti/NoArti_166_10.31.mp4

=== Round 168 | Learn 262144 steps (Total trained: 43778048) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1106     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 43786240 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 887         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 43794432    |
| train/                |             |
|    approx_kl          | 0.030906681 |
|    entropy_loss       | -2.14       |
|    explained_variance | 0.889       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0707     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_167.zip
[EVAL] Mean Return: 75.705, Best Return: 77.705
Saved video to ./runs_smw/videos/NoArti/NoArti_167_75.71.mp4

=== Round 169 | Learn 262144 steps (Total trained: 44040192) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1075     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 44048384 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 867         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 44056576    |
| train/                |             |
|    approx_kl          | 0.012946571 |
|    entropy_loss       | -2.25       |
|    explained_variance | 0.817       |
|    learning_rate      | 0.0001      |
|    loss               | 0.0259      |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_168.zip
[EVAL] Mean Return: 129.243, Best Return: 131.843
Saved video to ./runs_smw/videos/NoArti/NoArti_168_129.24.mp4

=== Round 170 | Learn 262144 steps (Total trained: 44302336) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1166     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 44310528 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 876         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 44318720    |
| train/                |             |
|    approx_kl          | 0.020769034 |
|    entropy_loss       | -2.34       |
|    explained_variance | 0.977       |
|    learning_rate      | 0.0001      |
|    loss               | -0.107      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_169.zip
[EVAL] Mean Return: 6.674, Best Return: 8.290
Saved video to ./runs_smw/videos/NoArti/NoArti_169_6.67.mp4

=== Round 171 | Learn 262144 steps (Total trained: 44564480) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1139     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 44572672 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 881         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 44580864    |
| train/                |             |
|    approx_kl          | 0.01481922  |
|    entropy_loss       | -2.32       |
|    explained_variance | 0.845       |
|    learning_rate      | 0.0001      |
|    loss               | -0.107      |
|    mean_step_rewar

Saved checkpoint: ./runs_smw/checkpoints/NoArti_170.zip
[EVAL] Mean Return: 40.096, Best Return: 42.096
Saved video to ./runs_smw/videos/NoArti/NoArti_170_40.10.mp4

=== Round 172 | Learn 262144 steps (Total trained: 44826624) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1180     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 44834816 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 897         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 44843008    |
| train/                |             |
|    approx_kl          | 0.016490314 |
|    entropy_loss       | -2.27       |
|    explained_variance | 0.909       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0781     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_171.zip
[EVAL] Mean Return: 132.140, Best Return: 134.740
Saved video to ./runs_smw/videos/NoArti/NoArti_171_132.14.mp4

=== Round 173 | Learn 262144 steps (Total trained: 45088768) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1163     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 45096960 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 905         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 45105152    |
| train/                |             |
|    approx_kl          | 0.018828973 |
|    entropy_loss       | -2.21       |
|    explained_variance | 0.933       |
|    learning_rate      | 0.0001      |
|    loss               | 0.0121      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_172.zip
[EVAL] Mean Return: 114.769, Best Return: 117.169
Saved video to ./runs_smw/videos/NoArti/NoArti_172_114.77.mp4

=== Round 174 | Learn 262144 steps (Total trained: 45350912) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1048     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 45359104 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 891         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 45367296    |
| train/                |             |
|    approx_kl          | 0.02586947  |
|    entropy_loss       | -2.19       |
|    explained_variance | 0.965       |
|    learning_rate      | 0.0001      |
|    loss               | -0.106      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_173.zip
[EVAL] Mean Return: 48.534, Best Return: 50.534
Saved video to ./runs_smw/videos/NoArti/NoArti_173_48.53.mp4

=== Round 175 | Learn 262144 steps (Total trained: 45613056) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1173     |
|    iterations      | 1        |
|    time_elapsed    | 6        |
|    total_timesteps | 45621248 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 903         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 45629440    |
| train/                |             |
|    approx_kl          | 0.020317096 |
|    entropy_loss       | -2.16       |
|    explained_variance | 0.94        |
|    learning_rate      | 0.0001      |
|    loss               | -0.0798     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_174.zip
[EVAL] Mean Return: 131.738, Best Return: 134.338
Saved video to ./runs_smw/videos/NoArti/NoArti_174_131.74.mp4

=== Round 176 | Learn 262144 steps (Total trained: 45875200) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1110     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 45883392 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 893         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 45891584    |
| train/                |             |
|    approx_kl          | 0.021832865 |
|    entropy_loss       | -2.14       |
|    explained_variance | 0.904       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0508     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_175.zip
[EVAL] Mean Return: -9.930, Best Return: -9.930
Saved video to ./runs_smw/videos/NoArti/NoArti_175_-9.93.mp4

=== Round 177 | Learn 262144 steps (Total trained: 46137344) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1158     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 46145536 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 893         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 46153728    |
| train/                |             |
|    approx_kl          | 0.024339985 |
|    entropy_loss       | -2.2        |
|    explained_variance | 0.977       |
|    learning_rate      | 0.0001      |
|    loss               | -0.107      |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_176.zip
[EVAL] Mean Return: -0.503, Best Return: -0.103
Saved video to ./runs_smw/videos/NoArti/NoArti_176_-0.50.mp4

=== Round 178 | Learn 262144 steps (Total trained: 46399488) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1072     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 46407680 |
---------------------------------
----------------------------------------
| time/                 |              |
|    fps                | 857          |
|    iterations         | 2            |
|    time_elapsed       | 19           |
|    total_timesteps    | 46415872     |
| train/                |              |
|    approx_kl          | 0.018275287  |
|    entropy_loss       | -2.38        |
|    explained_variance | 0.754        |
|    learning_rate      | 0.0001       |
|    loss               | -0.0798      |
|    

Saved checkpoint: ./runs_smw/checkpoints/NoArti_177.zip
[EVAL] Mean Return: 68.560, Best Return: 70.360
Saved video to ./runs_smw/videos/NoArti/NoArti_177_68.56.mp4

=== Round 179 | Learn 262144 steps (Total trained: 46661632) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1067     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 46669824 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 865         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 46678016    |
| train/                |             |
|    approx_kl          | 0.016717248 |
|    entropy_loss       | -2.21       |
|    explained_variance | 0.87        |
|    learning_rate      | 0.0001      |
|    loss               | 0.0129      |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_178.zip
[EVAL] Mean Return: -7.632, Best Return: -6.024
Saved video to ./runs_smw/videos/NoArti/NoArti_178_-7.63.mp4

=== Round 180 | Learn 262144 steps (Total trained: 46923776) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1096     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 46931968 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 891         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 46940160    |
| train/                |             |
|    approx_kl          | 0.0168985   |
|    entropy_loss       | -2.29       |
|    explained_variance | 0.886       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0983     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_179.zip
[EVAL] Mean Return: 36.854, Best Return: 38.054
Saved video to ./runs_smw/videos/NoArti/NoArti_179_36.85.mp4

=== Round 181 | Learn 262144 steps (Total trained: 47185920) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1105     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 47194112 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 898         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 47202304    |
| train/                |             |
|    approx_kl          | 0.022358393 |
|    entropy_loss       | -2.24       |
|    explained_variance | 0.951       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0995     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_180.zip
[EVAL] Mean Return: 131.460, Best Return: 134.060
Saved video to ./runs_smw/videos/NoArti/NoArti_180_131.46.mp4

=== Round 182 | Learn 262144 steps (Total trained: 47448064) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1076     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 47456256 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 886         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 47464448    |
| train/                |             |
|    approx_kl          | 0.021735588 |
|    entropy_loss       | -2.18       |
|    explained_variance | 0.958       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0839     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_181.zip
[EVAL] Mean Return: -64.756, Best Return: -62.748
Saved video to ./runs_smw/videos/NoArti/NoArti_181_-64.76.mp4

=== Round 183 | Learn 262144 steps (Total trained: 47710208) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1104     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 47718400 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 900         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 47726592    |
| train/                |             |
|    approx_kl          | 0.024745742 |
|    entropy_loss       | -2.27       |
|    explained_variance | 0.949       |
|    learning_rate      | 0.0001      |
|    loss               | -0.105      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_182.zip
[EVAL] Mean Return: 136.819, Best Return: 139.219
Saved video to ./runs_smw/videos/NoArti/NoArti_182_136.82.mp4

=== Round 184 | Learn 262144 steps (Total trained: 47972352) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1068     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 47980544 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 866         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 47988736    |
| train/                |             |
|    approx_kl          | 0.023304807 |
|    entropy_loss       | -2.26       |
|    explained_variance | 0.908       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0849     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_183.zip
[EVAL] Mean Return: 61.772, Best Return: 63.572
Saved video to ./runs_smw/videos/NoArti/NoArti_183_61.77.mp4

=== Round 185 | Learn 262144 steps (Total trained: 48234496) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1102     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 48242688 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 909         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 48250880    |
| train/                |             |
|    approx_kl          | 0.018614765 |
|    entropy_loss       | -2.25       |
|    explained_variance | 0.975       |
|    learning_rate      | 0.0001      |
|    loss               | -0.104      |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_184.zip
[EVAL] Mean Return: -9.905, Best Return: -9.905
Saved video to ./runs_smw/videos/NoArti/NoArti_184_-9.91.mp4

=== Round 186 | Learn 262144 steps (Total trained: 48496640) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1109     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 48504832 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 885         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 48513024    |
| train/                |             |
|    approx_kl          | 0.021749636 |
|    entropy_loss       | -2.27       |
|    explained_variance | 0.975       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0984     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_185.zip
[EVAL] Mean Return: 132.459, Best Return: 135.059
Saved video to ./runs_smw/videos/NoArti/NoArti_185_132.46.mp4

=== Round 187 | Learn 262144 steps (Total trained: 48758784) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1087     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 48766976 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 897         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 48775168    |
| train/                |             |
|    approx_kl          | 0.018649984 |
|    entropy_loss       | -2.14       |
|    explained_variance | 0.872       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0523     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_186.zip
[EVAL] Mean Return: 129.969, Best Return: 132.169
Saved video to ./runs_smw/videos/NoArti/NoArti_186_129.97.mp4

=== Round 188 | Learn 262144 steps (Total trained: 49020928) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1097     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 49029120 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 910         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 49037312    |
| train/                |             |
|    approx_kl          | 0.026590962 |
|    entropy_loss       | -2.3        |
|    explained_variance | 0.974       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0986     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_187.zip
[EVAL] Mean Return: 130.920, Best Return: 133.520
Saved video to ./runs_smw/videos/NoArti/NoArti_187_130.92.mp4

=== Round 189 | Learn 262144 steps (Total trained: 49283072) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1126     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 49291264 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 895         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 49299456    |
| train/                |             |
|    approx_kl          | 0.014667103 |
|    entropy_loss       | -2.17       |
|    explained_variance | 0.82        |
|    learning_rate      | 0.0001      |
|    loss               | -0.0704     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_188.zip
[EVAL] Mean Return: 132.297, Best Return: 134.897
Saved video to ./runs_smw/videos/NoArti/NoArti_188_132.30.mp4

=== Round 190 | Learn 262144 steps (Total trained: 49545216) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1112     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 49553408 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 907         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 49561600    |
| train/                |             |
|    approx_kl          | 0.01777405  |
|    entropy_loss       | -2.24       |
|    explained_variance | 0.891       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0549     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_189.zip
[EVAL] Mean Return: 40.361, Best Return: 40.961
Saved video to ./runs_smw/videos/NoArti/NoArti_189_40.36.mp4

=== Round 191 | Learn 262144 steps (Total trained: 49807360) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1095     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 49815552 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 904         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 49823744    |
| train/                |             |
|    approx_kl          | 0.025450103 |
|    entropy_loss       | -2.29       |
|    explained_variance | 0.845       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0892     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_190.zip
[EVAL] Mean Return: 32.730, Best Return: 34.730
Saved video to ./runs_smw/videos/NoArti/NoArti_190_32.73.mp4

=== Round 192 | Learn 262144 steps (Total trained: 50069504) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1105     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 50077696 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 907         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 50085888    |
| train/                |             |
|    approx_kl          | 0.026195016 |
|    entropy_loss       | -2.26       |
|    explained_variance | 0.937       |
|    learning_rate      | 0.0001      |
|    loss               | -0.113      |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_191.zip
[EVAL] Mean Return: -57.527, Best Return: -55.515
Saved video to ./runs_smw/videos/NoArti/NoArti_191_-57.53.mp4

=== Round 193 | Learn 262144 steps (Total trained: 50331648) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1119     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 50339840 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 905         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 50348032    |
| train/                |             |
|    approx_kl          | 0.029093191 |
|    entropy_loss       | -2.14       |
|    explained_variance | 0.978       |
|    learning_rate      | 0.0001      |
|    loss               | -0.087      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_192.zip
[EVAL] Mean Return: 69.661, Best Return: 71.661
Saved video to ./runs_smw/videos/NoArti/NoArti_192_69.66.mp4

=== Round 194 | Learn 262144 steps (Total trained: 50593792) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1152     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 50601984 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 925         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 50610176    |
| train/                |             |
|    approx_kl          | 0.022211557 |
|    entropy_loss       | -2.24       |
|    explained_variance | 0.926       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0885     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_193.zip
[EVAL] Mean Return: -66.971, Best Return: -64.963
Saved video to ./runs_smw/videos/NoArti/NoArti_193_-66.97.mp4

=== Round 195 | Learn 262144 steps (Total trained: 50855936) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1113     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 50864128 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 914         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 50872320    |
| train/                |             |
|    approx_kl          | 0.02605456  |
|    entropy_loss       | -2.22       |
|    explained_variance | 0.956       |
|    learning_rate      | 0.0001      |
|    loss               | -0.105      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_194.zip
[EVAL] Mean Return: 132.369, Best Return: 134.969
Saved video to ./runs_smw/videos/NoArti/NoArti_194_132.37.mp4

=== Round 196 | Learn 262144 steps (Total trained: 51118080) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1149     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 51126272 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 907         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 51134464    |
| train/                |             |
|    approx_kl          | 0.03319686  |
|    entropy_loss       | -2.17       |
|    explained_variance | 0.977       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0851     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_195.zip
[EVAL] Mean Return: 133.263, Best Return: 135.863
Saved video to ./runs_smw/videos/NoArti/NoArti_195_133.26.mp4

=== Round 197 | Learn 262144 steps (Total trained: 51380224) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1091     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 51388416 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 910         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 51396608    |
| train/                |             |
|    approx_kl          | 0.024465518 |
|    entropy_loss       | -2.18       |
|    explained_variance | 0.97        |
|    learning_rate      | 0.0001      |
|    loss               | -0.098      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_196.zip
[EVAL] Mean Return: 65.766, Best Return: 67.766
Saved video to ./runs_smw/videos/NoArti/NoArti_196_65.77.mp4

=== Round 198 | Learn 262144 steps (Total trained: 51642368) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1090     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 51650560 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 902         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 51658752    |
| train/                |             |
|    approx_kl          | 0.021012146 |
|    entropy_loss       | -2.25       |
|    explained_variance | 0.915       |
|    learning_rate      | 0.0001      |
|    loss               | -0.08       |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_197.zip
[EVAL] Mean Return: 1.039, Best Return: 1.239
Saved video to ./runs_smw/videos/NoArti/NoArti_197_1.04.mp4

=== Round 199 | Learn 262144 steps (Total trained: 51904512) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1109     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 51912704 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 887         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 51920896    |
| train/                |             |
|    approx_kl          | 0.021753587 |
|    entropy_loss       | -2.33       |
|    explained_variance | 0.981       |
|    learning_rate      | 0.0001      |
|    loss               | -0.108      |
|    mean_step_rewar

Saved checkpoint: ./runs_smw/checkpoints/NoArti_198.zip
[EVAL] Mean Return: -67.101, Best Return: -65.097
Saved video to ./runs_smw/videos/NoArti/NoArti_198_-67.10.mp4

=== Round 200 | Learn 262144 steps (Total trained: 52166656) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1152     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 52174848 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 927         |
|    iterations         | 2           |
|    time_elapsed       | 17          |
|    total_timesteps    | 52183040    |
| train/                |             |
|    approx_kl          | 0.022205576 |
|    entropy_loss       | -2.21       |
|    explained_variance | 0.945       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0772     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_199.zip
[EVAL] Mean Return: 69.009, Best Return: 71.009
Saved video to ./runs_smw/videos/NoArti/NoArti_199_69.01.mp4

=== Round 201 | Learn 262144 steps (Total trained: 52428800) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1058     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 52436992 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 891         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 52445184    |
| train/                |             |
|    approx_kl          | 0.020315535 |
|    entropy_loss       | -2.28       |
|    explained_variance | 0.929       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0447     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_200.zip
[EVAL] Mean Return: 109.770, Best Return: 112.170
Saved video to ./runs_smw/videos/NoArti/NoArti_200_109.77.mp4

=== Round 202 | Learn 262144 steps (Total trained: 52690944) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1061     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 52699136 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 884         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 52707328    |
| train/                |             |
|    approx_kl          | 0.030667774 |
|    entropy_loss       | -2.18       |
|    explained_variance | 0.949       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0942     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_201.zip
[EVAL] Mean Return: -5.639, Best Return: -5.439
Saved video to ./runs_smw/videos/NoArti/NoArti_201_-5.64.mp4

=== Round 203 | Learn 262144 steps (Total trained: 52953088) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1104     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 52961280 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 877         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 52969472    |
| train/                |             |
|    approx_kl          | 0.019783279 |
|    entropy_loss       | -2.26       |
|    explained_variance | 0.897       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0854     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_202.zip
[EVAL] Mean Return: 43.383, Best Return: 45.183
Saved video to ./runs_smw/videos/NoArti/NoArti_202_43.38.mp4

=== Round 204 | Learn 262144 steps (Total trained: 53215232) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1085     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 53223424 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 884         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 53231616    |
| train/                |             |
|    approx_kl          | 0.026201654 |
|    entropy_loss       | -2.22       |
|    explained_variance | 0.968       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0984     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_203.zip
[EVAL] Mean Return: 132.188, Best Return: 134.788
Saved video to ./runs_smw/videos/NoArti/NoArti_203_132.19.mp4

=== Round 205 | Learn 262144 steps (Total trained: 53477376) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1096     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 53485568 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 891         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 53493760    |
| train/                |             |
|    approx_kl          | 0.021659661 |
|    entropy_loss       | -2.29       |
|    explained_variance | 0.959       |
|    learning_rate      | 0.0001      |
|    loss               | -0.094      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_204.zip
[EVAL] Mean Return: -69.305, Best Return: -67.473
Saved video to ./runs_smw/videos/NoArti/NoArti_204_-69.31.mp4

=== Round 206 | Learn 262144 steps (Total trained: 53739520) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1108     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 53747712 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 905         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 53755904    |
| train/                |             |
|    approx_kl          | 0.018980835 |
|    entropy_loss       | -2.21       |
|    explained_variance | 0.927       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0794     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_205.zip
[EVAL] Mean Return: 39.074, Best Return: 41.074
Saved video to ./runs_smw/videos/NoArti/NoArti_205_39.07.mp4

=== Round 207 | Learn 262144 steps (Total trained: 54001664) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1098     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 54009856 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 896         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 54018048    |
| train/                |             |
|    approx_kl          | 0.019905657 |
|    entropy_loss       | -2.31       |
|    explained_variance | 0.906       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0928     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_206.zip
[EVAL] Mean Return: 126.793, Best Return: 129.393
Saved video to ./runs_smw/videos/NoArti/NoArti_206_126.79.mp4

=== Round 208 | Learn 262144 steps (Total trained: 54263808) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1105     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 54272000 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 886         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 54280192    |
| train/                |             |
|    approx_kl          | 0.025749221 |
|    entropy_loss       | -2.22       |
|    explained_variance | 0.973       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0997     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_207.zip
[EVAL] Mean Return: 127.901, Best Return: 130.301
Saved video to ./runs_smw/videos/NoArti/NoArti_207_127.90.mp4

=== Round 209 | Learn 262144 steps (Total trained: 54525952) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1131     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 54534144 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 896         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 54542336    |
| train/                |             |
|    approx_kl          | 0.024885118 |
|    entropy_loss       | -2.16       |
|    explained_variance | 0.967       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0994     |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_208.zip
[EVAL] Mean Return: 63.782, Best Return: 65.582
Saved video to ./runs_smw/videos/NoArti/NoArti_208_63.78.mp4

=== Round 210 | Learn 262144 steps (Total trained: 54788096) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1108     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 54796288 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 893         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 54804480    |
| train/                |             |
|    approx_kl          | 0.032167938 |
|    entropy_loss       | -2.07       |
|    explained_variance | 0.98        |
|    learning_rate      | 0.0001      |
|    loss               | -0.0515     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_209.zip
[EVAL] Mean Return: 1.704, Best Return: 1.904
Saved video to ./runs_smw/videos/NoArti/NoArti_209_1.70.mp4

=== Round 211 | Learn 262144 steps (Total trained: 55050240) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1102     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 55058432 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 901         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 55066624    |
| train/                |             |
|    approx_kl          | 0.034680873 |
|    entropy_loss       | -2.21       |
|    explained_variance | 0.959       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0975     |
|    mean_step_rewar

Saved checkpoint: ./runs_smw/checkpoints/NoArti_210.zip
[EVAL] Mean Return: 87.029, Best Return: 89.029
Saved video to ./runs_smw/videos/NoArti/NoArti_210_87.03.mp4

=== Round 212 | Learn 262144 steps (Total trained: 55312384) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1133     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 55320576 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 901         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 55328768    |
| train/                |             |
|    approx_kl          | 0.03165003  |
|    entropy_loss       | -2.13       |
|    explained_variance | 0.943       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0852     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_211.zip
[EVAL] Mean Return: 85.119, Best Return: 87.319
Saved video to ./runs_smw/videos/NoArti/NoArti_211_85.12.mp4

=== Round 213 | Learn 262144 steps (Total trained: 55574528) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1067     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 55582720 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 891         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 55590912    |
| train/                |             |
|    approx_kl          | 0.021776054 |
|    entropy_loss       | -2.16       |
|    explained_variance | 0.904       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0768     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_212.zip
[EVAL] Mean Return: 124.869, Best Return: 127.469
Saved video to ./runs_smw/videos/NoArti/NoArti_212_124.87.mp4

=== Round 214 | Learn 262144 steps (Total trained: 55836672) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1112     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 55844864 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 889         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 55853056    |
| train/                |             |
|    approx_kl          | 0.031205764 |
|    entropy_loss       | -2.27       |
|    explained_variance | 0.981       |
|    learning_rate      | 0.0001      |
|    loss               | -0.11       |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_213.zip
[EVAL] Mean Return: -6.346, Best Return: -6.146
Saved video to ./runs_smw/videos/NoArti/NoArti_213_-6.35.mp4

=== Round 215 | Learn 262144 steps (Total trained: 56098816) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1122     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 56107008 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 906         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 56115200    |
| train/                |             |
|    approx_kl          | 0.019846871 |
|    entropy_loss       | -2.24       |
|    explained_variance | 0.935       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0371     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_214.zip
[EVAL] Mean Return: 90.364, Best Return: 92.364
Saved video to ./runs_smw/videos/NoArti/NoArti_214_90.36.mp4

=== Round 216 | Learn 262144 steps (Total trained: 56360960) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1091     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 56369152 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 875         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 56377344    |
| train/                |             |
|    approx_kl          | 0.020892035 |
|    entropy_loss       | -2.22       |
|    explained_variance | 0.979       |
|    learning_rate      | 0.0001      |
|    loss               | -0.104      |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_215.zip
[EVAL] Mean Return: 88.132, Best Return: 90.332
Saved video to ./runs_smw/videos/NoArti/NoArti_215_88.13.mp4

=== Round 217 | Learn 262144 steps (Total trained: 56623104) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1144     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 56631296 |
---------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 899        |
|    iterations         | 2          |
|    time_elapsed       | 18         |
|    total_timesteps    | 56639488   |
| train/                |            |
|    approx_kl          | 0.01941209 |
|    entropy_loss       | -2.23      |
|    explained_variance | 0.95       |
|    learning_rate      | 0.0001     |
|    loss               | -0.0704    |
|    mean_step_reward   | 0.0

Saved checkpoint: ./runs_smw/checkpoints/NoArti_216.zip
[EVAL] Mean Return: 58.141, Best Return: 60.341
Saved video to ./runs_smw/videos/NoArti/NoArti_216_58.14.mp4

=== Round 218 | Learn 262144 steps (Total trained: 56885248) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1083     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 56893440 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 890         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 56901632    |
| train/                |             |
|    approx_kl          | 0.031993434 |
|    entropy_loss       | -2.17       |
|    explained_variance | 0.953       |
|    learning_rate      | 0.0001      |
|    loss               | -0.0735     |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_217.zip
[EVAL] Mean Return: 75.105, Best Return: 77.305
Saved video to ./runs_smw/videos/NoArti/NoArti_217_75.11.mp4

=== Round 219 | Learn 262144 steps (Total trained: 57147392) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1085     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 57155584 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 902         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 57163776    |
| train/                |             |
|    approx_kl          | 0.018036636 |
|    entropy_loss       | -2.27       |
|    explained_variance | 0.957       |
|    learning_rate      | 0.0001      |
|    loss               | -0.102      |
|    mean_step_re

Saved checkpoint: ./runs_smw/checkpoints/NoArti_218.zip
[EVAL] Mean Return: -66.515, Best Return: -64.511
Saved video to ./runs_smw/videos/NoArti/NoArti_218_-66.51.mp4

=== Round 220 | Learn 262144 steps (Total trained: 57409536) ===
Logging to ./runs_smw/tb/NoArti_0
---------------------------------
| time/              |          |
|    fps             | 1099     |
|    iterations      | 1        |
|    time_elapsed    | 7        |
|    total_timesteps | 57417728 |
---------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 898         |
|    iterations         | 2           |
|    time_elapsed       | 18          |
|    total_timesteps    | 57425920    |
| train/                |             |
|    approx_kl          | 0.018112961 |
|    entropy_loss       | -2.35       |
|    explained_variance | 0.969       |
|    learning_rate      | 0.0001      |
|    loss               | -0.101      |
|    mean_step

Saved checkpoint: ./runs_smw/checkpoints/NoArti_219.zip

Training interrupted manually.
Training finished. Environment closed.


'\ntensorboard --logdir=./runs_smw/tb\n'

## Display Video

In [None]:
from IPython.display import Video
video = "./runs_smw/videos/test_126.mp4"
display(Video(video, embed=True, width=768))

In [None]:
"""
[070] coins: 12 | score: 3540
[071] coins: 10 | score: 2260
[072] coins: 11 | score: 2760
[073] coins:  2 | score:  690
[074] coins: 12 | score: 3450
[075] coins: 12 | score: 3515
[076] coins: 12 | score: 3545
[077] coins: 12 | score: 3545
[078] coins: 10 | score: 2460
[079] coins: 12 | score: 3515
[080] coins: 12 | score: 3580
[081] coins: 11 | score: 2750
[082] coins: 12 | score: 3545
[083] coins: 12 | score: 3565
[084] coins: 11 | score: 3475
[085] coins:  0 | score:    0
[086] coins: 12 | score: 3535
[087] coins: 12 | score: 3560
[088] coins:  9 | score: 1420
[089] coins: 11 | score: 3640
[090] coins:  1 | score:  380
[091] coins: 10 | score: 2440
[092] coins: 12 | score: 3570
[093] coins: 12 | score: 3490
[094] coins: 11 | score: 2745
[095] coins: 12 | score: 3565
[096] coins:  0 | score:    0
[097] coins: 12 | score: 3490
[098] coins: 12 | score: 3570
[099] coins:  2 | score:  560
[100] coins:  2 | score:  660
[101] coins: 12 | score: 3580
[102] coins:  9 | score: 1420
[103] coins: 12 | score: 3575
[104] coins: 12 | score: 3585
[105] coins: 12 | score: 3580
[106] coins: 12 | score: 3525
[107] coins:  2 | score:  540
[108] coins:  2 | score:  660
[109] coins: 10 | score: 2420
[110] coins:  1 | score:  140
[111] coins: 11 | score: 2680
[112] coins:  2 | score:  580
[113] coins:  2 | score:  580
[114] coins:  2 | score:  560
[115] coins: 11 | score: 2765
[116] coins:  2 | score:  560
[117] coins:  0 | score:    0
[118] coins: 12 | score: 3570
[119] coins:  1 | score:  340
[120] coins: 11 | score: 2735
[121] coins: 12 | score: 3570
[122] coins: 12 | score: 3515
[123] coins: 12 | score: 3580
[124] coins: 12 | score: 3585
[125] coins: 12 | score: 3560
[126] coins: 12 | score: 3595
[127] coins: 12 | score: 3515

所有測試結束。
在 reward 紀錄上，紀錄前10幀的 action 是甚麼，然後檢查

--Run--
[57] coins: 12 | score: 3630
[58] coins: 12 | score: 3490
[59] coins: 11 | score: 2855
[60] coins: 12 | score: 3620
[61] coins: 12 | score: 3690
[62] coins: 12 | score: 3685
[63] coins: 11 | score: 2860
[64] coins: 10 | score: 2245
[65] coins: 12 | score: 3685
[66] coins: 12 | score: 3670
[67] coins: 12 | score: 3565
[68] coins: 12 | score: 3575
[69] coins: 11 | score: 2880
[70] coins: 12 | score: 3685

[72] coins: 9 | score: 1315
[73] coins: 12 | score: 3535
[74] coins: 12 | score: 3570
[75] coins: 12 | score: 3645
[76] coins: 12 | score: 3690
[77] coins: 8 | score: 1020
[78] coins: 12 | score: 3695
[79] coins: 11 | score: 2780
[80] coins: 12 | score: 3695
[81] coins: 12 | score: 3695
[82] coins: 12 | score: 3655
[83] coins: 12 | score: 3690
[84] coins: 12 | score: 3635
[85] coins: 11 | score: 2840
[86] coins: 12 | score: 3680
[87] coins: 12 | score: 3645
[88] coins: 12 | score: 3625
[89] coins: 0 | score: 0
[90] coins: 12 | score: 3650
[91] coins: 12 | score: 3695
[92] coins: 12 | score: 3695
[93] coins: 12 | score: 3700
[94] coins: 12 | score: 3700
[95] coins: 12 | score: 3625
[96] coins: 12 | score: 3700
[97] coins: 12 | score: 3705
[98] coins: 12 | score: 3695
[99] coins: 2 | score: 640
[100] coins: 12 | score: 3705
[101] coins: 9 | score: 1400
[102] coins: 9 | score: 1420
[103] coins: 12 | score: 3705
[104] coins: 2 | score: 770
[105] coins: 12 | score: 3705
[106] coins: 12 | score: 3705
[107] coins: 1 | score: 380
[108] coins: 12 | score: 3745 *
[109] coins: 12 | score: 3660
[110] coins: 11 | score: 2915
[111] coins: 5 | score: 2810
[112] coins: 12 | score: 3680
[113] coins: 12 | score: 3540
[114] coins: 11 | score: 2860
[115] coins: 12 | score: 3740
[116] coins: 12 | score: 3730
[117] coins: 12 | score: 3725
[118] coins: 12 | score: 3680
[119] coins: 6 | score: 3650
[120] coins: 12 | score: 3745
[121] coins: 12 | score: 3770 *
[122] coins: 11 | score: 2885
[123] coins: 12 | score: 3720
[124] coins: 12 | score: 3710
[125] coins: 11 | score: 2685
[126] coins: 12 | score: 3510
[127] coins: 12 | score: 3750 *
[128] coins: 12 | score: 3730
[129] coins: 12 | score: 3635
[130] coins: 12 | score: 3730
[131] coins: 11 | score: 2745
[132] coins: 12 | score: 3720
[133] coins: 12 | score: 3760 *
[134] coins: 12 | score: 3730
[135] coins: 12 | score: 3735
[136] coins: 12 | score: 3715
[137] coins: 12 | score: 3730
[138] coins: 12 | score: 3670
[139] coins: 12 | score: 3730
[140] coins: 11 | score: 3685
[141] coins: 13 | score: 3690
[142] coins: 13 | score: 3740
[143] coins: 13 | score: 3740
[144] coins: 13 | score: 3735
[145] coins: 13 | score: 3675

--Nature
[148] coins: 6 | score: 3745
[149] coins: 6 | score: 3800
[150] coins: 6 | score: 3790
[151] coins: 6 | score: 3740
[152] coins: 5 | score: 3725
[153] coins: 6 | score: 3745
[154] coins: 6 | score: 3635
[155] coins: 2 | score: 820
[156] coins: 6 | score: 3870 *
[157] coins: 6 | score: 3750
[158] coins: 6 | score: 3850
[159] coins: 6 | score: 3855 *
"""