In [11]:
%%writefile submission.py
import base64
import pickle
import zlib
import numpy as np
import torch
import torch.nn as nn
from torch.nn.modules.container import Sequential
# from torch.distributions.categorical import Categorical
from kaggle_environments.envs.hungry_geese.hungry_geese import Action


class FlattenExtractor(nn.Module):
    """Some Information about FlattenExtractor"""
    def __init__(self):
        super(FlattenExtractor, self).__init__()
        self.flatten = nn.Flatten(start_dim=1, end_dim=-1)

    def forward(self, x):
        x = self.flatten(x)
        return x


class MlpExtractor(nn.Module):
    """Some Information about MlpExtractor"""
    def __init__(self):
        super(MlpExtractor, self).__init__()
        self.shared_net = Sequential()
        self.policy_net = Sequential(
            nn.Linear(in_features=1386, out_features=64, bias=True),
            nn.Tanh(),
            nn.Linear(in_features=64, out_features=64, bias=True),
            nn.Tanh()
        )
        self.value_net = Sequential(
            nn.Linear(in_features=1386, out_features=64, bias=True),
            nn.Tanh(),
            nn.Linear(in_features=64, out_features=64, bias=True),
            nn.Tanh()
        )

    def forward(self, x):
        p = self.policy_net(x)
        v = self.value_net(x)
        return p, v


class ActorCriticPolicy(nn.Module):
    """Some Information about ActorCriticPolicy"""
    def __init__(self):
        super(ActorCriticPolicy, self).__init__()
        self.feature_extractor = FlattenExtractor()
        self.mlp_extractor = MlpExtractor()
        self.action_net = nn.Linear(in_features=64, out_features=4, bias=True)
        self.value_net = nn.Linear(in_features=64, out_features=1, bias=True)

    def forward(self, x):
        x = self.feature_extractor(x)
        p, v = self.mlp_extractor(x)
        p = self.action_net(p)
        v = self.value_net(v)
        return p, v


state_dict = _STATE_DICT_

state_dict = pickle.loads(zlib.decompress(base64.b64decode(state_dict)))
model = ActorCriticPolicy()
model.load_state_dict(state_dict)
model.eval()

obs_prev = None


# Modified from https://www.kaggle.com/yuricat/smart-geese-trained-by-reinforcement-learning
def process_obs(obs):
    global obs_prev
    b = np.zeros((18, 7 * 11), dtype=np.float32)
    b[-1] = 1  # empty cells

    for p, pos_list in enumerate(obs['geese']):
        # head position
        for pos in pos_list[:1]:
            b[0 + (p - obs['index']) % 4, pos] = 1
            b[-1, pos] = 0
        # tip position
        for pos in pos_list[-1:]:
            b[4 + (p - obs['index']) % 4, pos] = 1
            b[-1, pos] = 0
        # whole position
        for pos in pos_list:
            b[8 + (p - obs['index']) % 4, pos] = 1
            b[-1, pos] = 0

    # previous head position
    if obs_prev is not None:
        for p, pos_list in enumerate(obs_prev['geese']):
            for pos in pos_list[:1]:
                b[12 + (p - obs['index']) % 4, pos] = 1

    # food
    for pos in obs['food']:
        b[16, pos] = 1
        b[-1, pos] = 0

    return b.reshape(-1, 7, 11)


def agent(obs, conf):
    global model, obs_prev
    obs_backup = obs
    obs = process_obs(obs).reshape(1, -1, 7, 11)
    obs = torch.from_numpy(obs)
    p, v = model(obs)
    # print(p)
    action = p.squeeze().argmax().item() + 1
    # action = Categorical(logits=p).sample().item() + 1
    obs_prev = obs_backup
    return Action(action).name

Overwriting submission.py


In [12]:
import base64
import pickle
import zlib
from stable_baselines3 import PPO

model_path = 'models1/model_380000_steps'
model = PPO.load(model_path)

state_dict = model.policy.to('cpu').state_dict()
state_dict = base64.b64encode(zlib.compress(pickle.dumps(state_dict)))

with open('submission.py', 'r') as file:
    src = file.read()
src = src.replace("_STATE_DICT_", f"{state_dict}")
with open('submission.py', 'w') as file:
    file.write(src)

In [13]:
from kaggle_environments import make

env = make("hungry_geese", debug=True)

env.run(["submission.py", "submission.py", "submission.py", "submission.py"])
# env.render(mode="ipython", width=800, height=700)

Opposite action: (2, <Action.WEST: 4>, <Action.EAST: 2>)
Goose Collision: SOUTH
Opposite action: (3, <Action.EAST: 2>, <Action.WEST: 4>)


[[{'action': 'NORTH',
   'reward': 0,
   'info': {},
   'observation': {'remainingOverageTime': 60,
    'step': 0,
    'geese': [[54], [76], [9], [34]],
    'food': [29, 47],
    'index': 0},
   'status': 'ACTIVE'},
  {'action': 'NORTH',
   'reward': 0,
   'info': {},
   'observation': {'remainingOverageTime': 60, 'index': 1},
   'status': 'ACTIVE'},
  {'action': 'NORTH',
   'reward': 0,
   'info': {},
   'observation': {'remainingOverageTime': 60, 'index': 2},
   'status': 'ACTIVE'},
  {'action': 'NORTH',
   'reward': 0,
   'info': {},
   'observation': {'remainingOverageTime': 60, 'index': 3},
   'status': 'ACTIVE'}],
 [{'action': 'EAST',
   'reward': 201,
   'info': {},
   'observation': {'remainingOverageTime': 60,
    'step': 1,
    'geese': [[44], [66], [75], [35]],
    'food': [29, 47],
    'index': 0},
   'status': 'ACTIVE'},
  {'action': 'EAST',
   'reward': 201,
   'info': {},
   'observation': {'remainingOverageTime': 60, 'index': 1},
   'status': 'ACTIVE'},
  {'action': 'NO

In [14]:
!kaggle competitions submit -c hungry-geese -f submission.py -m "PPO MlpPolicy only self-play"

100%|█████████████████████████████████████████| 907k/907k [00:04<00:00, 188kB/s]
Successfully submitted to Hungry Geese