## Creating An Environment With Simple Movement
Code adapted from: https://pypi.org/project/gym-super-mario-bros/

In [1]:
# check pytorch & cuda are playing together nicely
# Known valid setup is pytorch 1.8.2 LTS (need python <=3.8, >=3.6) & cuda 11.3, at least for wsl2 (Ubuntu 18.04 LTS) w/ Nvidia RTX 3070
import torch
torch_can_see_cuda_device = torch.cuda.is_available()
print("Can torch see a GPU via cuda? {}".format("Yes" if torch_can_see_cuda_device else "No"))

# Get additional error message if not visible
if not torch_can_see_cuda_device:
    torch.zeros(1).cuda()

  from .autonotebook import tqdm as notebook_tqdm


Can torch see a GPU via cuda? Yes


In [2]:
from nes_py.wrappers import JoypadSpace
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, RIGHT_ONLY, COMPLEX_MOVEMENT
from stable_baselines3 import PPO
from stable_baselines3.common import atari_wrappers
from stable_baselines3.common.vec_env import DummyVecEnv
import os

RAW = 0
DOWNSAMPLE = 1
PIXEL = 2
RECTANGLE = 3

def create_env(version, action_space):
    env = gym_super_mario_bros.make('SuperMarioBros-v{}'.format(version))
    env = JoypadSpace(env, action_space)
    # Convert to greyscale and downsample to 84x84
    return atari_wrappers.AtariWrapper(env, noop_max=5, terminal_on_life_loss=False)
    
def create_vec_env(version, action_space, n):
    return DummyVecEnv([lambda: create_env(version, action_space) for _ in range(n)])


In [None]:
training_env = create_vec_env(RECTANGLE, SIMPLE_MOVEMENT, 128)

# assuming running in the src dir
cwd = os.getcwd()
if "/src" in cwd:
    model_path = "{}/models/ppo".format(cwd)
else:
    model_path = "{}/src/models/ppo".format(os.getcwd())
print("Model will be saved to: {}".format(model_path))
assert os.path.isdir(model_path)

# hard-coded to different mount
log_path = "/mnt/e/SMB/ppo/logs"
print("Logs will be saved to: {}".format(log_path))
assert os.path.isdir(log_path)

# Using Pre-implemented algo
from stable_baselines3.common.callbacks import CheckpointCallback
total_timesteps = 5e6

checkpoint_callback = CheckpointCallback(save_freq=1e6,
                                         save_path=log_path,
                                         name_prefix='smbv3_model_3')
model = PPO("CnnPolicy", training_env, verbose=1, batch_size=64, n_steps=1024, tensorboard_log="{}/tensorboard/".format(log_path))
model.learn(total_timesteps=total_timesteps, callback=checkpoint_callback)
model.save("{}/SuperMarioBrosRandomStages_{}_{}_{}_model_2".format(model_path, RECTANGLE, "SIMPLE_MOVEMENT", "CnnPolicy"))

training_env.close()

Model will be saved to: /home/whiffingj/dev/uni/CM50270_CW2/src/models/ppo
Logs will be saved to: /mnt/e/SMB/ppo/logs
Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to /mnt/e/SMB/ppo/logs/tensorboard/PPO_1
-------------------------------
| time/              |        |
|    fps             | 149    |
|    iterations      | 1      |
|    time_elapsed    | 874    |
|    total_timesteps | 131072 |
-------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 129         |
|    iterations           | 2           |
|    time_elapsed         | 2023        |
|    total_timesteps      | 262144      |
| train/                  |             |
|    approx_kl            | 0.014555465 |
|    clip_fraction        | 0.175       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.93       |
|    explained_variance   | 0.00605     |
|    learning_rate        | 0.0003      |
|   

In [None]:
from stable_baselines3.common.vec_env import VecVideoRecorder

# Viewing what it has managed to learn
print("setting-up env")
test_env = create_vec_env(PIXEL, RIGHT_ONLY, 1)
test_step_limit = 5000
model_path = "{}/src/models/ppo/SuperMarioBrosRandomStages_{}_{}_{}_model".format(os.getcwd(), PIXEL, "RIGHT_ONLY", "CnnPolicy")
video_path = "/mnt/e/SMB/ppo/video"
# Make ability to pick model?

# if model is None: # if not already defined, load from saved model:
assert os.path.isfile("{}.zip".format(model_path))
print("Loading model from: {}".format(model_path))
model = PPO.load(model_path)
assert os.path.isdir(video_path)

# save to e drive (/mnt/e/smb_agent_training)
print("Setting up recorder")
VecVideoRecorder(test_env, video_folder=video_path, record_video_trigger=lambda step: step == 0, video_length=test_step_limit, name_prefix="smb_test")

print("resetting env")
obs = test_env.reset()
for _ in range(test_step_limit):
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, done, info = test_env.step(action)
    test_env.render()
    if done:
      obs = test_env.reset()

test_env.close()

setting-up env
Loading model from: /home/whiffingj/dev/uni/CM50270_CW2/src/models/ppo/SuperMarioBrosRandomStages_2_RIGHT_ONLY_CnnPolicy_model


In [None]:
# In case there's an issue with the training/execution, run this to ensure env closed.
try:
    training_env.close()
except Exception as ex:
    print("training_env already closed / doesn't exist:\n{}".format(ex))

try:
    test_env.close()
except Exception as ex:
    print("test_env already closed / doesn't exist:\n{}".format(ex))