In [11]:
%%writefile submission.py
import base64
import pickle
import zlib
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.distributions.categorical import Categorical
from kaggle_environments.envs.hungry_geese.hungry_geese import Action


class TorusConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.edge_size = (kernel_size[0] // 2, kernel_size[1] // 2)
        self.conv = nn.Conv2d(input_dim, output_dim, kernel_size=kernel_size)
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = torch.cat([x[:,:,:,-self.edge_size[1]:], x, x[:,:,:,:self.edge_size[1]]], dim=3)
        h = torch.cat([h[:,:,-self.edge_size[0]:], h, h[:,:,:self.edge_size[0]]], dim=2)
        h = self.conv(h)
        h = self.bn(h) if self.bn is not None else h
        return h


class GeeseNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = TorusConv2d(18, filters, (3, 3), False)
        self.blocks = nn.ModuleList([TorusConv2d(filters, filters, (3, 3), False) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 4, bias=False)
        self.head_v = nn.Linear(filters * 2, 1, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        h_avg = h.view(h.size(0), h.size(1), -1).mean(-1)
        p = self.head_p(h_head)
        v = torch.tanh(self.head_v(torch.cat([h_head, h_avg], 1)))
        return p, v


state_dict = _STATE_DICT_

state_dict = pickle.loads(zlib.decompress(base64.b64decode(state_dict)))
model = GeeseNet()
model.load_state_dict(state_dict)
model.eval()

obs_prev = None


# Modified from https://www.kaggle.com/yuricat/smart-geese-trained-by-reinforcement-learning
def process_obs(obs):
    global obs_prev
    b = np.zeros((18, 7 * 11), dtype=np.float32)
    b[-1] = 1  # empty cells

    for p, pos_list in enumerate(obs['geese']):
        # head position
        for pos in pos_list[:1]:
            b[0 + (p - obs['index']) % 4, pos] = 1
            b[-1, pos] = 0
        # tip position
        for pos in pos_list[-1:]:
            b[4 + (p - obs['index']) % 4, pos] = 1
            b[-1, pos] = 0
        # whole position
        for pos in pos_list:
            b[8 + (p - obs['index']) % 4, pos] = 1
            b[-1, pos] = 0

    # previous head position
    if obs_prev is not None:
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (p - obs['index']) % 4, pos] = 1

    # food
    for pos in obs['food']:
        b[16, pos] = 1
        b[-1, pos] = 0

    return b.reshape(-1, 7, 11)


def agent(obs, conf):
    global model, obs_prev
    obs_backup = obs
    obs = process_obs(obs).reshape(1, -1, 7, 11)
    obs = torch.from_numpy(obs)
    p, _ = model(obs)
    # action = action.squeeze().argmax().item() + 1
    action = Categorical(logits=p).sample().item() + 1
    obs_prev = obs_backup
    return Action(action).name

Overwriting submission.py


In [12]:
import base64
import pickle
import zlib
import os
import glob
import torch
from stable_baselines3 import PPO
from collections import OrderedDict

model_path = 'models1/model_1180000_steps'
print('Loading', model_path)
model = PPO.load(model_path)

state_dict = model.policy.state_dict()

state_dict = OrderedDict({
    key[14:]: state_dict[key]
    for key in state_dict.keys()
    if key.startswith('mlp_extractor.')
})
# print(state_dict.keys())
state_dict = base64.b64encode(zlib.compress(pickle.dumps(state_dict)))

with open('submission.py', 'r') as file:
    src = file.read()
src = src.replace("_STATE_DICT_", f"{state_dict}")
with open('submission.py', 'w') as file:
    file.write(src)

Loading models1/model_1180000_steps


In [13]:
from kaggle_environments import make

env = make("hungry_geese", debug=True) #set debug to True to see agent internals each step

env.run(["submission.py", "submission.py", "submission.py", "submission.py"])
# env.render(mode="ipython", width=800, height=700)

Goose Starved: Action.NORTH
Goose Starved: Action.NORTH
Goose Starved: Action.NORTH
Goose Starved: Action.NORTH


': 'NORTH',
   'reward': 1001,
   'info': {},
   'observation': {'remainingOverageTime': 60, 'index': 3},
   'status': 'ACTIVE'}],
 [{'action': 'NORTH',
   'reward': 1101,
   'info': {},
   'observation': {'remainingOverageTime': 60,
    'step': 10,
    'geese': [[27], [56], [46], [69]],
    'food': [32, 75],
    'index': 0},
   'status': 'ACTIVE'},
  {'action': 'NORTH',
   'reward': 1101,
   'info': {},
   'observation': {'remainingOverageTime': 60, 'index': 1},
   'status': 'ACTIVE'},
  {'action': 'NORTH',
   'reward': 1101,
   'info': {},
   'observation': {'remainingOverageTime': 60, 'index': 2},
   'status': 'ACTIVE'},
  {'action': 'NORTH',
   'reward': 1101,
   'info': {},
   'observation': {'remainingOverageTime': 60, 'index': 3},
   'status': 'ACTIVE'}],
 [{'action': 'NORTH',
   'reward': 1201,
   'info': {},
   'observation': {'remainingOverageTime': 60,
    'step': 11,
    'geese': [[16], [45], [35], [58]],
    'food': [32, 75],
    'index': 0},
   'status': 'ACTIVE'},
  {'ac

In [3]:
!kaggle competitions submit -c hungry-geese -f submission.py -m "PPO steps=360000 lr=1e-5 only self-play"

100%|█████████████████████████████████████████| 572k/572k [00:04<00:00, 135kB/s]
Successfully submitted to Hungry Geese