# Lux AI Deep Reinforcement Learning Environment Example
See https://github.com/glmcdona/LuxPythonEnvGym for environment project and updates.

This is a python replica of the Lux game engine to speed up training. It reformats the agent problem into making a action decision per-unit for the team.

In [8]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
print(device)

cuda


In [9]:
import argparse
import glob
import os
import random
from typing import Callable

from stable_baselines3 import PPO  # pip install stable-baselines3
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import SubprocVecEnv

from importlib import reload
import agent_policy
# reload(agent_policy) # Reload the file from disk incase the above agent-writing cell block was edited
from agent_policy import AgentPolicy

from luxai2021.env.agent import Agent
from luxai2021.env.lux_env import LuxEnvironment
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from luxai2021.game.constants import LuxMatchConfigs_Default
import torch.nn as nn
import gym
import torch.nn.functional as F




# Default Lux configs
configs = LuxMatchConfigs_Default

# Create a default opponent agent
opponent = Agent()

# Create a RL agent in training mode
player = AgentPolicy(mode="train")

# Create the Lux environment
env = LuxEnvironment(configs=configs,
                     learning_agent=player,
                     opponent_agent=opponent)

class BasicConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.conv = nn.Conv2d(
            input_dim, output_dim, 
            kernel_size=kernel_size, 
            padding=(kernel_size[0] // 2, kernel_size[1] // 2)
        )
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = self.conv(x)
        h = self.bn(h) if self.bn is not None else h
        return h


class LuxNet(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256):
        super(LuxNet, self).__init__(observation_space, features_dim)
        n_input_channels = observation_space.shape[0]
        layers, filters = 8, 32
        self.conv0 = BasicConv2d(n_input_channels, filters, (3, 3), True)
        self.blocks = nn.ModuleList([BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, features_dim, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        p = self.head_p(h_head)
        return p

policy_kwargs = dict(
    features_extractor_class=LuxNet,
    features_extractor_kwargs=dict(features_dim=128),
    net_arch=[]
)

# Define the model, you can pick other RL algos from Stable Baselines3 instead if you like
model = PPO("MlpPolicy",
                env,
                verbose=1,
                tensorboard_log="./lux_tensorboard_featurecnn2/",
                learning_rate=0.001,
                gamma=0.999,
                gae_lambda=0.95,
                batch_size=4096,
                n_steps=2048 * 8,
                policy_kwargs=policy_kwargs
                            )
# Define a learning rate schedule
# (number of steps, learning_rate)
schedule = [
    #(2000000, 0.01),
    (1, 0.001),
    # (10, 0.0001),
]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


Running in inference-only mode.


In [10]:
model.policy.load_state_dict(torch.load("/home/ubuntu/work/codes/rl_openai/model_state_dict"), strict=False)

_IncompatibleKeys(missing_keys=['value_net.weight', 'value_net.bias'], unexpected_keys=[])

In [11]:
from stable_baselines3.common.utils import get_schedule_fn

print("Training model...")
run_id = 1
path = "models_transfer"
# Save a checkpoint every 1M steps
checkpoint_callback = CheckpointCallback(save_freq=1000000,
                                         save_path=f'./{path}/',
                                         name_prefix=f'rl_model_{run_id}')

# Train the policy
for steps, learning_rate in schedule:
    model.lr_schedule = get_schedule_fn(learning_rate)
    model.learn(total_timesteps=steps,
                callback=checkpoint_callback,
                reset_num_timesteps = False)

# Save final model
model.save(path=f'{path}/model.zip')

print("Done training model.")

Training model...
Logging to ./lux_tensorboard_featurecnn2/PPO_0
rew/r_total=0.03,rew/r_wood=0.01,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=111.00,game/research=7.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.01,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=1.60,rew/r_wood=0.22,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.14,rew/r_city_tiles_end=1.00,rew/r_fuel_collected=0.10,rew/r_units=0.05,rew/r_city_tiles=0.10,game/turns=360.00,game/research=54.00,game/unit_count=1.00,game/cart_count=0.00,game/city_count=1.00,game/city_tiles=1.00,game/wood_rate_mined=0.22,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 912      |
|    ep_rew_mean     | 4.57     |
| time/              |          |
| 

KeyboardInterrupt: 

# Set up a Kaggle Submission and lux replay environment for the agent

In [5]:
"""
This downloads two required python package dependencies that are not pre-installed
by Kaggle yet.

This places the following two packages in the current working directory:
    luxai2021
    stable_baselines3
"""

import os
import shutil
import subprocess
import tempfile

def localize_package(git, branch, folder):
    if os.path.exists(folder):
        print("Already localized %s" % folder)
    else:
        # https://stackoverflow.com/questions/51239168/how-to-download-single-file-from-a-git-repository-using-python
        # Create temporary dir
        t = tempfile.mkdtemp()

        args = ['git', 'clone', '--depth=1', git, t, '-b', branch]
        res = subprocess.Popen(args, stdout=subprocess.PIPE)
        output, _error = res.communicate()

        if not _error:
            print(output)
        else:
            print(_error)
        
        # Copy desired file from temporary dir
        shutil.move(os.path.join(t, folder), '.')
        # Remove temporary dir
        shutil.rmtree(t, ignore_errors=True)

localize_package('https://github.com/glmcdona/LuxPythonEnvGym.git', 'main', 'luxai2021')
localize_package('https://github.com/glmcdona/LuxPythonEnvGym.git', 'main', 'kaggle_submissions')
localize_package('https://github.com/DLR-RM/stable-baselines3.git', 'master', 'stable_baselines3')

b''
b''
b''


In [6]:
# Move the dependent packages into kaggle submissions
!mv luxai2021 kaggle_submissions
!mv stable_baselines3 kaggle_submissions
!rm ./kaggle_submissions/agent_policy.py
!cp agent_policy.py kaggle_submissions

# Copy the agent and model to the submission 
!cp ./agent_policy.py kaggle_submissions
!cp ./models_transfer/model.zip kaggle_submissions

!ls kaggle_submissions

rm: cannot remove './kaggle_submissions/agent_policy.py': No such file or directory
agent_policy.py		  luxai2021  main_lux-ai-2021.py  stable_baselines3
download_dependencies.py  main.py    model.zip


In [7]:
from kaggle_environments import make
import json
# run another match but with our empty agent
env = make("lux_ai_2021", configuration={"seed": 5621242, "loglevel": 2, "annotations": True}, debug=True)

# Play the environment where the RL agent plays against itself
steps = env.run(["./kaggle_submissions/main.py", "./kaggle_submissions/main.py"])

Loading environment football failed: No module named 'gfootball'
Running in inference-only mode.
Traceback (most recent call last):
  File "./main_lux-ai-2021.py", line 33, in <module>
    env.reset()  # This will automatically run the game since there is
  File "/home/ubuntu/work/codes/rl_openai/kaggle_submissions/luxai2021/env/lux_env.py", line 178, in reset
    (unit, city_tile, team, is_new_turn) = next(self.match_generator)
  File "/home/ubuntu/work/codes/rl_openai/kaggle_submissions/luxai2021/game/match_controller.py", line 263, in run_to_next_observation
    actions = agent.process_turn(self.game, agent.team)
  File "/home/ubuntu/work/codes/rl_openai/kaggle_submissions/agent_policy.py", line 562, in process_turn
    action_code, _states = self.model.predict(obs, deterministic=False)
  File "/home/ubuntu/work/codes/rl_openai/kaggle_submissions/stable_baselines3/common/base_class.py", line 544, in predict
    return self.policy.predict(observation, state, mask, deterministic)
  Fi

In [12]:
# Render the match
env.render(mode="ipython", width=1200, height=800)

# Prepare and submit the kaggle submission

In [11]:
!tar -czf submission.tar.gz -C kaggle_submissions .
!ls

__notebook__.ipynb  agent_policy.py	log.txt		 models
__pycache__	    kaggle_submissions	lux_tensorboard  submission.tar.gz
