In [1]:
%%capture
%%bash
# Install ViZDoom deps from
# https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md#-linux

apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
libopenal-dev timidity libwildmidi-dev unzip ffmpeg

# Boost libraries
apt-get install libboost-all-dev

# Lua binding dependencies
apt-get install liblua5.1-dev

In [2]:
# install python libraries
# thanks toinsson
!pip install faster-fifo==1.4.2
!pip install vizdoom

Collecting faster-fifo==1.4.2
  Downloading faster-fifo-1.4.2.tar.gz (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.6/84.6 kB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: faster-fifo
  Building wheel for faster-fifo (pyproject.toml) ... [?25l[?25hdone
  Created wheel for faster-fifo: filename=faster_fifo-1.4.2-cp310-cp310-linux_x86_64.whl size=333548 sha256=6bfa3eff0f9c2a049fc44c55e1d5f7e974d89856b93d6a4c66ca4dd82f1ed760
  Stored in directory: /root/.cache/pip/wheels/e9/72/37/2c9091826a1ceb1e7ece278f0e074e80be349a05fdb0a53d0d
Successfully built faster-fifo
Installing collected packages: faster-fifo
Successfully installed faster-fifo-1.4.2
Collecting vizdoom
  Downloading vizdoom-1.2.3-cp310-cp310-manylinux_2_28_x86_64.whl (28.0 MB)
[2K     [90

In [4]:
!pip install sample-factory==2.0.2



In [5]:
import functools

from sample_factory.algo.utils.context import global_model_factory
from sample_factory.cfg.arguments import parse_full_cfg, parse_sf_args
from sample_factory.envs.env_utils import register_env
from sample_factory.train import run_rl

from sf_examples.vizdoom.doom.doom_model import make_vizdoom_encoder
from sf_examples.vizdoom.doom.doom_params import add_doom_env_args, doom_override_defaults
from sf_examples.vizdoom.doom.doom_utils import DOOM_ENVS, make_doom_env_from_spec


# Registers all the ViZDoom environments
def register_vizdoom_envs():
    for env_spec in DOOM_ENVS:
        make_env_func = functools.partial(make_doom_env_from_spec, env_spec)
        register_env(env_spec.name, make_env_func)

# Sample Factory allows the registration of a custom Neural Network architecture
# See https://github.com/alex-petrenko/sample-factory/blob/master/sf_examples/vizdoom/doom/doom_model.py for more details
def register_vizdoom_models():
    global_model_factory().register_encoder_factory(make_vizdoom_encoder)


def register_vizdoom_components():
    register_vizdoom_envs()
    register_vizdoom_models()

# parse the command line args and create a config
def parse_vizdoom_cfg(argv=None, evaluation=False):
    parser, _ = parse_sf_args(argv=argv, evaluation=evaluation)
    # parameters specific to Doom envs
    add_doom_env_args(parser)
    # override Doom default values for algo parameters
    doom_override_defaults(parser)
    # second parsing pass yields the final configuration
    final_cfg = parse_full_cfg(parser, argv)
    return final_cfg

In [6]:
## Start the training, this should take around 15 minutes
register_vizdoom_components()

# The scenario we train on today is health gathering
# other scenarios include "doom_basic", "doom_two_colors_easy", "doom_dm", "doom_dwango5", "doom_my_way_home", "doom_deadly_corridor", "doom_defend_the_center", "doom_defend_the_line"
env = "doom_health_gathering_supreme"
cfg = parse_vizdoom_cfg(argv=[f"--env={env}", "--num_workers=8", "--num_envs_per_worker=4", "--train_for_env_steps=5000000"])

status = run_rl(cfg)

[36m[2024-01-06 06:02:44,264][00204] register_encoder_factory: <function make_vizdoom_encoder at 0x7a26467b4670>[0m
[33m[2024-01-06 06:02:44,276][00204] Saved parameter configuration for experiment default_experiment not found![0m
[33m[2024-01-06 06:02:44,277][00204] Starting experiment from scratch![0m
[36m[2024-01-06 06:02:44,293][00204] Experiment dir /content/train_dir/default_experiment already exists![0m
[36m[2024-01-06 06:02:44,294][00204] Resuming existing experiment from /content/train_dir/default_experiment...[0m
[36m[2024-01-06 06:02:44,297][00204] Weights and Biases integration disabled[0m
[36m[2024-01-06 06:02:46,746][00204] Queried available GPUs: 0
[0m
[37m[1m[2024-01-06 06:02:46,748][00204] Environment var CUDA_VISIBLE_DEVICES is 0
[0m
[36m[2024-01-06 06:02:48,380][00204] Starting experiment with the following configuration:
help=False
algo=APPO
env=doom_health_gathering_supreme
experiment=default_experiment
train_dir=/content/train_dir
restart_behavio

In [7]:
from sample_factory.enjoy import enjoy
cfg = parse_vizdoom_cfg(argv=[f"--env={env}", "--num_workers=1", "--save_video", "--no_render", "--max_num_episodes=10"], evaluation=True)
status = enjoy(cfg)

[33m[2024-01-06 06:30:53,323][00204] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json[0m
[36m[2024-01-06 06:30:53,326][00204] Overriding arg 'num_workers' with value 1 passed from command line[0m
[36m[2024-01-06 06:30:53,328][00204] Adding new argument 'no_render'=True that is not in the saved config file![0m
[36m[2024-01-06 06:30:53,330][00204] Adding new argument 'save_video'=True that is not in the saved config file![0m
[36m[2024-01-06 06:30:53,332][00204] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file![0m
[36m[2024-01-06 06:30:53,334][00204] Adding new argument 'video_name'=None that is not in the saved config file![0m
[36m[2024-01-06 06:30:53,335][00204] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file![0m
[36m[2024-01-06 06:30:53,336][00204] Adding new argument 'max_num_episodes'=10 that is not in the saved config file![0m
[36m[2024-01-06 06

In [8]:
from base64 import b64encode
from IPython.display import HTML

mp4 = open('/content/train_dir/default_experiment/replay.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=640 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [9]:
from huggingface_hub import notebook_login
notebook_login()
!git config --global credential.helper store

# !huggingface-cli login --token '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'

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [10]:
from sample_factory.enjoy import enjoy

hf_username = "Weiming1122" # insert your HuggingFace username here

cfg = parse_vizdoom_cfg(argv=[f"--env={env}", "--num_workers=1", "--save_video", "--no_render", "--max_num_episodes=10", "--max_num_frames=100000", "--push_to_hub", f"--hf_repository={hf_username}/rl_course_vizdoom_health_gathering_supreme"], evaluation=True)
status = enjoy(cfg)

[33m[2024-01-06 06:36:51,286][00204] Loading existing experiment configuration from /content/train_dir/default_experiment/config.json[0m
[36m[2024-01-06 06:36:51,288][00204] Overriding arg 'num_workers' with value 1 passed from command line[0m
[36m[2024-01-06 06:36:51,290][00204] Adding new argument 'no_render'=True that is not in the saved config file![0m
[36m[2024-01-06 06:36:51,292][00204] Adding new argument 'save_video'=True that is not in the saved config file![0m
[36m[2024-01-06 06:36:51,295][00204] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file![0m
[36m[2024-01-06 06:36:51,296][00204] Adding new argument 'video_name'=None that is not in the saved config file![0m
[36m[2024-01-06 06:36:51,299][00204] Adding new argument 'max_num_frames'=100000 that is not in the saved config file![0m
[36m[2024-01-06 06:36:51,300][00204] Adding new argument 'max_num_episodes'=10 that is not in the saved config file![0m
[36m[2024-01-06 06:36:51

events.out.tfevents.1704520964.5f74293c5f3a:   0%|          | 0.00/702k [00:00<?, ?B/s]

best_000001105_4526080_reward_25.856.pth:   0%|          | 0.00/34.9M [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

checkpoint_000001222_5005312.pth:   0%|          | 0.00/34.9M [00:00<?, ?B/s]

checkpoint_000001135_4648960.pth:   0%|          | 0.00/34.9M [00:00<?, ?B/s]

replay.mp4:   0%|          | 0.00/21.7M [00:00<?, ?B/s]

[37m[1m[2024-01-06 06:38:58,362][00204] The model has been pushed to https://huggingface.co/Weiming1122/rl_course_vizdoom_health_gathering_supreme[0m


In [11]:
#download the agent from the hub
!python -m sample_factory.huggingface.load_from_hub -r edbeeching/doom_health_gathering_supreme_2222 -d ./train_dir

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/edbeeching/doom_health_gathering_supreme_2222 into local empty directory.
Download file checkpoint_p0/best_000466273_3819708416_reward_63.056.pth:   0% 100k/39.3M [00:01<09:47, 70.0kB/s]
Download file checkpoint_p0/best_000466273_3819708416_reward_63.056.pth:   3% 1.11M/39.3M [00:02<01:01, 654kB/s]
Download file replay.mp4:   1% 24.4k/4.07M [00:01<04:04, 17.4kB/s][A

Download file .summary/0/events.out.tfevents.1666554178.r12i0n7:   0% 31.4k/8.77M [00:00<?, ?B/s][A[A


Download file checkpoint_p0/checkpoint_000538763_4413546496.pth:   0% 32.0k/39.3M [00:00<?, ?B/s][A[A[A



Download file checkpoint_p0/best_000466273_3819708416_reward_63.056.pth:  20% 7.86M/39.3M [00:03<00:09, 3.58MB/s]

Download file .summary/0/events.out.tfevents.1666554178.r12i0n7:   3% 282k/8.77M [00:01<00:34, 256kB/s][A[A


Download file checkpoint_p0/checkpoint_000538763_4413546496

In [12]:
!ls train_dir/doom_health_gathering_supreme_2222

cfg.json  checkpoint_p0  README.md  replay.mp4


In [13]:
env = "doom_health_gathering_supreme"
cfg = parse_vizdoom_cfg(argv=[f"--env={env}", "--num_workers=1", "--save_video", "--no_render", "--max_num_episodes=10", "--experiment=doom_health_gathering_supreme_2222", "--train_dir=train_dir"], evaluation=True)
status = enjoy(cfg)

[33m[2024-01-06 06:40:02,998][00204] Loading legacy config file train_dir/doom_health_gathering_supreme_2222/cfg.json instead of train_dir/doom_health_gathering_supreme_2222/config.json[0m
[33m[2024-01-06 06:40:03,000][00204] Loading existing experiment configuration from train_dir/doom_health_gathering_supreme_2222/config.json[0m
[36m[2024-01-06 06:40:03,002][00204] Overriding arg 'experiment' with value 'doom_health_gathering_supreme_2222' passed from command line[0m
[36m[2024-01-06 06:40:03,004][00204] Overriding arg 'train_dir' with value 'train_dir' passed from command line[0m
[36m[2024-01-06 06:40:03,006][00204] Overriding arg 'num_workers' with value 1 passed from command line[0m
[36m[2024-01-06 06:40:03,008][00204] Adding new argument 'env_gpu_observations'=True that is not in the saved config file![0m
[36m[2024-01-06 06:40:03,010][00204] Adding new argument 'no_render'=True that is not in the saved config file![0m
[36m[2024-01-06 06:40:03,011][00204] Adding new 

In [14]:
mp4 = open('/content/train_dir/doom_health_gathering_supreme_2222/replay.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=640 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [15]:
# Download the agent from the hub
!python -m sample_factory.huggingface.load_from_hub -r edbeeching/doom_deathmatch_bots_2222 -d ./train_dir

For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http.
Cloning https://huggingface.co/edbeeching/doom_deathmatch_bots_2222 into local empty directory.
Download file checkpoint_p0/best_000270111_2212749312_reward_93.067.pth:   0% 175k/39.5M [00:01<04:01, 171kB/s]
Download file replay.mp4:   0% 15.4k/6.94M [00:00<?, ?B/s][A

Download file checkpoint_p0/checkpoint_000281648_2307260416.pth:   0% 16.5k/39.5M [00:00<?, ?B/s][A[A


Download file checkpoint_p0/checkpoint_000282220_2311946240.pth:   0% 31.4k/39.5M [00:00<?, ?B/s][A[A[A



Download file .summary/0/events.out.tfevents.1666554187.r13i1n3:   0% 32.0k/9.32M [00:00<?, ?B/s][A[A[A[A

Download file checkpoint_p0/best_000270111_2212749312_reward_93.067.pth:   8% 3.15M/39.5M [00:02<00:20, 1.90MB/s]


Download file checkpoint_p0/checkpoint_000282220_2311946240.pth:   0% 99.4k/39.5M [00:01<09:54, 69.5kB/s][A[A[A
Download file replay.mp4:   2% 148k/6.94M [00:01<00:52, 135kB/s][A



Down

In [16]:

from sample_factory.enjoy import enjoy
register_vizdoom_components()
env = "doom_deathmatch_bots"
cfg = parse_vizdoom_cfg(argv=[f"--env={env}", "--num_workers=1", "--save_video", "--no_render", "--max_num_episodes=1", "--experiment=doom_deathmatch_bots_2222", "--train_dir=train_dir"], evaluation=True)
status = enjoy(cfg)
mp4 = open('/content/train_dir/doom_deathmatch_bots_2222/replay.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=640 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)