In [1]:
%%writefile submission.py
import base64
import pickle
import zlib
import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.modules.container import Sequential
# from torch.distributions.categorical import Categorical
from kaggle_environments.envs.hungry_geese.hungry_geese import Action


class Extractor(nn.Module):    
    def __init__(self, kernel_size, features_dim=512):
        super(Extractor, self).__init__()
        n_channels = 11
        self.conv1 = nn.Conv2d(n_channels, 32, kernel_size=kernel_size)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=kernel_size)
        self.flatten = nn.Flatten(start_dim=1)
        self.fc3 = nn.Linear(2880, features_dim)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.flatten(x)
        x = F.relu(self.fc3(x))
        return x


class MlpExtractor(nn.Module):
    def __init__(self):
        super(MlpExtractor, self).__init__()
        self.shared_net = Sequential(
            nn.Linear(512, 512),
            nn.ReLU()
        )
        self.policy_net = Sequential(
            nn.Linear(512, 512),
            nn.ReLU()
        )
        self.value_net = Sequential(
            nn.Linear(512, 512),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.shared_net(x)
        p = self.policy_net(x)
        v = self.value_net(x)
        return p, v


class ActorCriticPolicy(nn.Module):
    def __init__(self):
        super(ActorCriticPolicy, self).__init__()
        self.features_extractor = Extractor(kernel_size=(3, 3))
        self.mlp_extractor = MlpExtractor()
        self.action_net = nn.Linear(in_features=512, out_features=4, bias=True)
        self.value_net = nn.Linear(in_features=512, out_features=1, bias=True)

    def forward(self, x):
        x = self.features_extractor(x)
        p, v = self.mlp_extractor(x)
        p = self.action_net(p)
        v = self.value_net(v)
        return p, v


state_dict = _STATE_DICT_

state_dict = pickle.loads(zlib.decompress(base64.b64decode(state_dict)))
model = ActorCriticPolicy()
model.load_state_dict(state_dict)
model.eval()

obs_prev = None
act_prev = None


# Modified from https://www.kaggle.com/yuricat/smart-geese-trained-by-reinforcement-learning
def process_obs(obs):
    global obs_prev
    obs_index = obs.index

    b = np.zeros((7 * 11, 11), dtype=np.uint8)
    b[:, -1] = 255  # empty cells

    for p, pos_list in enumerate(obs.geese):
        # head position
        for pos in pos_list[:1]:
            b[pos, 0 + (p - obs_index) % 4] = 255
            b[pos, -1] = 0
        # whole position
        for pos in pos_list:
            b[pos, 4 + (p - obs_index) % 4] = 255
            b[pos, -1] = 0

    # previous head position
    if obs_prev is not None:
        for pos in obs_prev.geese[obs_index][:1]:
            b[pos, -3] = 255

    for pos in obs.food:
        b[pos, -2] = 255

    b = b.reshape(7, 11, -1)

    b = np.concatenate([b[:, -1:],
                        b,
                        b[:, :1]], axis=1)
    b = np.concatenate([b[-1:, :],
                        b,
                        b[:1, :]], axis=0)
    return b


def agent(obs, conf):
    global model, obs_prev, act_prev
    obs_backup = obs
    obs = process_obs(obs) / 255.  # normalize
    obs = np.expand_dims(obs.transpose(2, 0, 1), 0)
    obs = torch.from_numpy(obs.astype(np.float32))
    p, v = model(obs)
    p = p.squeeze()
    # print(p)
    p = F.softmax(p, dim=0)
    if act_prev is not None:
        act_oppo = (act_prev + 1) % 4 + 1
        p[act_oppo - 1] = 0
        p /= p.sum()
    action = p.squeeze().argmax().item() + 1
    # action = Categorical(p).sample().item() + 1
    obs_prev = obs_backup
    act_prev = action
    return Action(action).name

Overwriting submission.py


In [2]:
import os
import glob
import base64
import pickle
import zlib
from stable_baselines3 import PPO
from kaggle_environments import make

path = 'models3'
list_of_files = glob.glob(os.path.join(path, '*.zip'))
model_path = max(list_of_files, key=os.path.getmtime)
print(model_path)
model = PPO.load(model_path)
print(model.policy)

state_dict = model.policy.to('cpu').state_dict()
state_dict = base64.b64encode(zlib.compress(pickle.dumps(state_dict)))

with open('submission.py', 'r') as file:
    src = file.read()
src = src.replace("_STATE_DICT_", f"{state_dict}")
with open('submission.py', 'w') as file:
    file.write(src)

Loading environment football failed: No module named 'gfootball'
models3/model_4070000_steps.zip
ActorCriticPolicy(
  (features_extractor): Extractor(
    (conv1): Conv2d(11, 32, kernel_size=(3, 3), stride=(1, 1))
    (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (flatten): Flatten(start_dim=1, end_dim=-1)
    (fc3): Linear(in_features=2880, out_features=512, bias=True)
  )
  (mlp_extractor): MlpExtractor(
    (shared_net): Sequential(
      (0): Linear(in_features=512, out_features=512, bias=True)
      (1): ReLU()
    )
    (policy_net): Sequential(
      (0): Linear(in_features=512, out_features=512, bias=True)
      (1): ReLU()
    )
    (value_net): Sequential(
      (0): Linear(in_features=512, out_features=512, bias=True)
      (1): ReLU()
    )
  )
  (action_net): Linear(in_features=512, out_features=4, bias=True)
  (value_net): Linear(in_features=512, out_features=1, bias=True)
)


In [22]:
env = make("hungry_geese", debug=False)
env.run(["examples/risk_averse.py", "examples/mighty_boiler_goose.py", "examples/simple_bfs.py", "submission.py"])
env.render(mode="ipython", width=700, height=550)
# white, blue, green, red

In [5]:
!kaggle competitions submit -c hungry-geese -f submission.py -m "PPO MlpPolicy(ReLU) self-play"

100%|██████████████████████████████████████| 10.8M/10.8M [00:06<00:00, 1.65MB/s]
Successfully submitted to Hungry Geese