# Model Complexity

In [1]:
%load_ext autoreload
%autoreload 2

from functools import partial
from collections import defaultdict
import pickle

from wordle.wordle import Wordle
from environment.environment import Environment, StateYesNo
from environment.action import ActionVocabulary, ActionLetters, ActionCombLetters, ActionWagons
from dqn.agent import Agent
from dqn.train import exp_with_action, train_test_split, get_dordle_data, play_dordle
from replay_buffer.cpprb import PrioritizedReplayBuffer, ReplayBuffer

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
import torch
import numpy as np
np.random.seed(0)

"cuda:0" if torch.cuda.is_available() else "cpu"

'cuda:0'

## Data & Train Configuration

In [2]:
answers = Wordle._load_vocabulary('wordle/answers.txt', astype=list)
guesses = Wordle._load_vocabulary('wordle/guesses.txt', astype=list)
wordle_list = guesses

in_answers = []
for i, word in enumerate(guesses):
  if word in answers:
    in_answers.append(i)

print(len(answers), len(guesses))

indices = np.arange(len(guesses))
np.random.shuffle(indices)

2315 12972


In [3]:
def agent_path(nickname, t):
    return {
        'local': f'{nickname}/local-{t}.pth',
        'target': f'{nickname}/target-{t}.pth',
        'buffer': f'{nickname}/buffer-{t}.npz',
        'optimizer': f'{nickname}/optimizer-{t}.pth',
    }

In [4]:
def get_data(n_guesses, overfit):
    return train_test_split(n_guesses, overfit, guesses, indices, in_answers)

## Letters

In [8]:
ohe_matrix = ActionLetters._make_ohe(vocabulary=wordle_list)
rewards = {'repeat':-0.1, 'B':0, 'Y':1, 'G':1, 'win':15, 'lose':-15, 'step':-5}
def train(**kwargs):
    return exp_with_action('letters', rewards, ohe_matrix=ohe_matrix, wordle_list=wordle_list, **kwargs)

In [9]:
n_guesses = len(guesses)
data = get_data(n_guesses, overfit=True)

guesses: 12972
train answers: 2315
test answers: 2315 (overfit strategy)


In [11]:
train(
    data=data,
    
    n_episodes=int(1e6),
    n_episodes_warm=100,
    logging_interval=30000,
    
    eps_start=1,
    eps_end=0.05,
    eps_decay=0.9954,

    alpha=0.4,
    rb_size=int(1e5),
    method_name='resnet-final',
    
    n_envs=8,
    optimize_interval=8,

    agent_path=None
)



WARM EPISODES:   0%|          | 0/100 [00:00<?, ?it/s]

Initial Stats. 	Test Win Rate: 0.09%	Test Mean Steps: 5.9965

Test Games Distribution by Steps: 1: 0, 2: 2, 3: 0, 4: 0, 5: 0, 6: 2313
Test Games Distribution by Reward: [-45.0,-39.4): 2266, [-39.4,-33.8): 47, [-33.8,-28.3): 0, [-28.3,-22.7): 0, [-22.7,-17.1): 0, [-17.1,-11.5): 0, [-11.5,-5.9): 0, [-5.9,-0.4): 0, [-0.4,5.2): 0, [5.2,10.8): 2
Test Rewards Distribution by Type: repeat: 0.37%, B: 0.00%, Y: 3.17%, G: 1.81%, win: 0.03%, lose: 31.54%, step: 63.09%


TRAIN EPISODES:   0%|          | 0/1000000 [00:00<?, ?it/s]


Episodes: 30000	Time: 79 s	Agent Eps: 0.05	Train Win Rate: 0.32%	Test Win Rate: 0.95%	Test Mean Steps: 5.9832

Episodes: 60000	Time: 156 s	Agent Eps: 0.05	Train Win Rate: 3.53%	Test Win Rate: 10.84%	Test Mean Steps: 5.8281

Episodes: 90000	Time: 229 s	Agent Eps: 0.05	Train Win Rate: 27.35%	Test Win Rate: 48.68%	Test Mean Steps: 5.2773

Episodes: 120000	Time: 296 s	Agent Eps: 0.05	Train Win Rate: 57.21%	Test Win Rate: 69.29%	Test Mean Steps: 5.0397

Episodes: 150000	Time: 363 s	Agent Eps: 0.05	Train Win Rate: 69.58%	Test Win Rate: 72.92%	Test Mean Steps: 5.1127

Episodes: 180000	Time: 427 s	Agent Eps: 0.05	Train Win Rate: 74.08%	Test Win Rate: 73.78%	Test Mean Steps: 5.1119

Episodes: 210000	Time: 493 s	Agent Eps: 0.05	Train Win Rate: 76.37%	Test Win Rate: 78.79%	Test Mean Steps: 4.8566

Episodes: 240000	Time: 557 s	Agent Eps: 0.05	Train Win Rate: 78.62%	Test Win Rate: 83.80%	Test Mean Steps: 4.8557

Episodes: 270000	Time: 620 s	Agent Eps: 0.05	Train Win Rate: 80.87%	Test Win Rate: 86.

'resnet-final-2315-12972'

## Wagons

In [12]:
ohe_matrix = ActionWagons._make_ohe(vocabulary=wordle_list, k=2)
rewards = {'repeat':-0.1, 'B':0, 'Y':1, 'G':1, 'win':15, 'lose':-15, 'step':-5}

print(ohe_matrix.shape)

torch.Size([1155, 12972])


In [13]:
def train(**kwargs):
    return exp_with_action('wagons', rewards, ohe_matrix=ohe_matrix, k=2, wordle_list=wordle_list, **kwargs)

In [14]:
n_guesses = len(guesses)
data = get_data(n_guesses, overfit=True)

guesses: 12972
train answers: 2315
test answers: 2315 (overfit strategy)


In [15]:
train(
    data=data,
    
    n_episodes=int(1e6),
    n_episodes_warm=100,
    logging_interval=30000,
    
    eps_start=1,
    eps_end=0.05,
    eps_decay=0.9954,
    
    alpha=0.4,
    rb_size=int(1e6),
    method_name='wagons',

    n_envs=8,
    optimize_interval=8,

    agent_path=None
)

WARM EPISODES:   0%|          | 0/100 [00:00<?, ?it/s]

Initial Stats. 	Test Win Rate: 0.00%	Test Mean Steps: 6.0000

Test Games Distribution by Steps: 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 2315
Test Games Distribution by Reward: [-45.0,-44.2): 27, [-44.2,-43.4): 112, [-43.4,-42.6): 448, [-42.6,-41.8): 654, [-41.8,-41.0): 171, [-41.0,-40.2): 428, [-40.2,-39.4): 278, [-39.4,-38.6): 150, [-38.6,-37.8): 41, [-37.8,-37.0): 6
Test Rewards Distribution by Type: repeat: 0.11%, B: 0.00%, Y: 4.98%, G: 2.18%, win: 0.00%, lose: 30.91%, step: 61.82%


TRAIN EPISODES:   0%|          | 0/1000000 [00:00<?, ?it/s]


Episodes: 30000	Time: 97 s	Agent Eps: 0.05	Train Win Rate: 0.06%	Test Win Rate: 0.13%	Test Mean Steps: 5.9965

Episodes: 60000	Time: 194 s	Agent Eps: 0.05	Train Win Rate: 0.23%	Test Win Rate: 0.35%	Test Mean Steps: 5.9922

Episodes: 90000	Time: 291 s	Agent Eps: 0.05	Train Win Rate: 0.42%	Test Win Rate: 0.39%	Test Mean Steps: 5.9914

Episodes: 120000	Time: 389 s	Agent Eps: 0.05	Train Win Rate: 0.52%	Test Win Rate: 0.73%	Test Mean Steps: 5.9844

Episodes: 150000	Time: 491 s	Agent Eps: 0.05	Train Win Rate: 0.61%	Test Win Rate: 0.86%	Test Mean Steps: 5.9840

Episodes: 180000	Time: 593 s	Agent Eps: 0.05	Train Win Rate: 0.79%	Test Win Rate: 0.73%	Test Mean Steps: 5.9883

Episodes: 210000	Time: 692 s	Agent Eps: 0.05	Train Win Rate: 0.95%	Test Win Rate: 0.69%	Test Mean Steps: 5.9836

Episodes: 240000	Time: 788 s	Agent Eps: 0.05	Train Win Rate: 1.04%	Test Win Rate: 1.60%	Test Mean Steps: 5.9633

Episodes: 270000	Time: 882 s	Agent Eps: 0.05	Train Win Rate: 1.20%	Test Win Rate: 2.03%	Test Mean S

'wagons-2315-12972'