# About

This notebook is for testing and commentary.

For each model, there's a .py file which was used for its training.

In [1]:
import os
import random
import numpy as np
%matplotlib inline
from kaggle_environments import make, evaluate
from stable_baselines3 import PPO

Loading environment football failed: No module named 'gfootball'


In [2]:
MODEL_DIR = os.path.join(".","models")

# Utility functions

In [3]:
# divides board into 3 channels - https://www.kaggle.com/c/connectx/discussion/168246
# first channel: player 1 pieces
# second channel: player 2 pieces
# third channel: possible moves. 1 for player_1 and -1 for player_2
def transform_board(board, mark):
    rows = board[0].shape[0]
    columns = board[0].shape[1]

    layer1 = board[0].copy()
    for c in range(0, columns):
        for r in range(rows - 1, -1, -1):
            value = layer1[r, c]
            if value == 1:
                layer1[r, c] = 1
            else:
                layer1[r, c] = 0

    layer2 = board[0].copy()
    for c in range(0, columns):
        for r in range(rows - 1, -1, -1):
            value = layer2[r, c]
            if value == 2:
                layer2[r, c] = 1
            else:
                layer2[r, c] = 0

    layer3 = board[0].copy()
    for c in range(0, columns):
        for r in range(rows - 1, -1, -1):
            value = layer3[r, c]
            if value == 0:
                if (mark == 1):
                    layer3[r, c] = 1
                else:
                    layer3[r, c] = -1
                break
            else:
                layer3[r, c] = 0

    board = np.array([[layer1, layer2, layer3]])
    return board

def get_win_percentages(agent1, agent2, n_rounds=100):
    # Use default Connect Four setup
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    # Agent 1 goes first (roughly) half the time          
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
    # Agent 2 goes first (roughly) half the time      
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2))
    print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2))
    print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
    print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))
    
def agent(obs, config):
    board_2d = np.array(obs['board']).reshape(1,6,7)
    board_3c = transform_board(board_2d, obs.mark)
    col, _ = model.predict(board_3c, deterministic=True)
    return int(col)
    # Check if selected column is valid
    is_valid = (obs['board'][int(col)] == 0)
    # If not valid, select random move. 
    if is_valid:
        return int(col)
    else:
        return random.choice([col for col in range(config.columns) if obs.board[int(col)] == 0])

### PPO CNN vs random

90-95 win rate after ~250k steps

In [5]:
model = PPO.load(os.path.join(MODEL_DIR, 'ppo_cnn_vs_random'))
get_win_percentages(agent1=agent, agent2="random", n_rounds=1000)

Agent 1 Win Percentage: 0.96
Agent 2 Win Percentage: 0.04
Number of Invalid Plays by Agent 1: 5
Number of Invalid Plays by Agent 2: 0


### PPO CNN self-play

It gets stuck at 80-85 win rate vs random and doesn't improve with more steps(I let it run for a few million).

Average episode length oscillates between 5-8 steps.

Not sure what's wrong.

In [6]:
model = PPO.load(os.path.join(MODEL_DIR, 'ppo_cnn_self_play'))
get_win_percentages(agent1=agent, agent2="random", n_rounds=1000)

Agent 1 Win Percentage: 0.86
Agent 2 Win Percentage: 0.08
Number of Invalid Plays by Agent 1: 56
Number of Invalid Plays by Agent 2: 0
