# Testing Updates

In [1]:
%load_ext autoreload
%autoreload 2

from functools import partial
from collections import defaultdict
import pickle

from wordle.wordle import Wordle
from environment.environment import Environment, StateYesNo
from environment.action import ActionVocabulary, ActionLetters, ActionCombLetters, ActionWagons
from dqn.agent import Agent
from dqn.train import exp_with_action, train_test_split, get_dordle_data, play_dordle
from replay_buffer.cpprb import PrioritizedReplayBuffer, ReplayBuffer

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
import torch
import numpy as np
np.random.seed(0)

"cuda:0" if torch.cuda.is_available() else "cpu"

'cuda:0'

## Data & Train Configuration

In [2]:
answers = Wordle._load_vocabulary('wordle/answers.txt', astype=list)
guesses = Wordle._load_vocabulary('wordle/guesses.txt', astype=list)
wordle_list = guesses

in_answers = []
for i, word in enumerate(guesses):
  if word in answers:
    in_answers.append(i)

print(len(answers), len(guesses))

indices = np.arange(len(guesses))
np.random.shuffle(indices)

2315 12972


In [3]:
def agent_path(nickname, t):
    return {
        'local': f'{nickname}/local-{t}.pth',
        'target': f'{nickname}/target-{t}.pth',
        'buffer': f'{nickname}/buffer-{t}.npz',
        'optimizer': f'{nickname}/optimizer-{t}.pth',
    }

In [4]:
def get_data(n_guesses, overfit, augment_prob_word=None, augment_prob_letter=None):
    return train_test_split(n_guesses, overfit, guesses, indices, in_answers, augment_prob_word, augment_prob_letter)

In [5]:
train_answers_cur, test_answers_cur, guesses_cur, aug_words = get_data(50, True, 0.5, 0.2)

guesses: 51
train answers: 8
test answers: 7 (overfit strategy)
augmented: 1


In [6]:
print(train_answers_cur)

['caulk', 'savvy', 'truce', 'aware', 'cheer', 'tally', 'liner', 'litfp']


In [7]:
print(test_answers_cur, aug_words)

['caulk', 'savvy', 'truce', 'aware', 'cheer', 'tally', 'liner'] ['litfp']


## Train Vocabulary

In [None]:
rewards = {'repeat':-0.1, 'B':1, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}

In [59]:
def train(**kwargs):
    return exp_with_action('vocabulary', rewards, **kwargs)

In [60]:
n_guesses = 100
data = get_data(n_guesses, overfit=True)

guesses: 100
train answers: 15
test answers: 15 (overfit strategy)
augmented: 0


In [61]:
train(
    data=data,
    
    n_episodes=1000,
    n_episodes_warm=100,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.99954,

    alpha=0,
    rb_size=int(1e5),
    method_name='testbed',
    
    n_envs=8,
    optimize_interval=8,

    agent_path=None
)

Initial Test. 	Win Rate: 0.00%	Mean Steps: 6.0000


TRAIN EPISODES:   0%|          | 0/1000 [00:00<?, ?it/s]


Episodes:  125	Time: 0 s	Agent Eps: 0.96	Train Win Rate: 9.60%	Test Win Rate: 6.67%	Test Mean Steps: 5.6667

Episodes:  253	Time: 1 s	Agent Eps: 0.92	Train Win Rate: 4.69%	Test Win Rate: 13.33%	Test Mean Steps: 5.4000

Episodes:  378	Time: 1 s	Agent Eps: 0.88	Train Win Rate: 4.80%	Test Win Rate: 26.67%	Test Mean Steps: 5.1333

Episodes:  503	Time: 2 s	Agent Eps: 0.84	Train Win Rate: 10.40%	Test Win Rate: 13.33%	Test Mean Steps: 5.4667

Episodes:  629	Time: 2 s	Agent Eps: 0.81	Train Win Rate: 14.29%	Test Win Rate: 26.67%	Test Mean Steps: 5.1333

Episodes:  754	Time: 3 s	Agent Eps: 0.77	Train Win Rate: 17.60%	Test Win Rate: 20.00%	Test Mean Steps: 5.4000

Episodes:  880	Time: 3 s	Agent Eps: 0.74	Train Win Rate: 19.05%	Test Win Rate: 40.00%	Test Mean Steps: 4.6667

Episodes: 1005	Time: 3 s	Agent Eps: 0.71	Train Win Rate: 20.80%	Test Win Rate: 60.00%	Test Mean Steps: 4.2000


'testbed-15-100'

## Train Letters

In [None]:
ohe_matrix = ActionLetters._make_ohe(vocabulary=wordle_list)
rewards = {'repeat':-0.1, 'B':1, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}

In [62]:
def train(**kwargs):
    return exp_with_action('letters', rewards, ohe_matrix=ohe_matrix, wordle_list=wordle_list, **kwargs)

In [63]:
n_guesses = 100
data = get_data(n_guesses, overfit=True)

guesses: 100
train answers: 15
test answers: 15 (overfit strategy)
augmented: 0


In [64]:
train(
    data=data,
    
    n_episodes=1000,
    n_episodes_warm=100,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.99954,

    alpha=0.4,
    rb_size=int(1e5),
    method_name='test_aug',
    
    n_envs=8,
    optimize_interval=8,

    agent_path=None
)

Initial Test. 	Win Rate: 0.00%	Mean Steps: 6.0000


TRAIN EPISODES:   0%|          | 0/1000 [00:00<?, ?it/s]


Episodes:  125	Time: 0 s	Agent Eps: 0.96	Train Win Rate: 8.00%	Test Win Rate: 0.00%	Test Mean Steps: 6.0000

Episodes:  250	Time: 1 s	Agent Eps: 0.92	Train Win Rate: 5.60%	Test Win Rate: 13.33%	Test Mean Steps: 5.5333

Episodes:  376	Time: 1 s	Agent Eps: 0.88	Train Win Rate: 10.32%	Test Win Rate: 20.00%	Test Mean Steps: 5.2667

Episodes:  501	Time: 2 s	Agent Eps: 0.84	Train Win Rate: 22.40%	Test Win Rate: 53.33%	Test Mean Steps: 4.6667

Episodes:  626	Time: 2 s	Agent Eps: 0.81	Train Win Rate: 15.20%	Test Win Rate: 40.00%	Test Mean Steps: 4.8000

Episodes:  751	Time: 3 s	Agent Eps: 0.78	Train Win Rate: 18.40%	Test Win Rate: 40.00%	Test Mean Steps: 4.3333

Episodes:  876	Time: 3 s	Agent Eps: 0.75	Train Win Rate: 40.00%	Test Win Rate: 66.67%	Test Mean Steps: 3.8000

Episodes: 1003	Time: 4 s	Agent Eps: 0.72	Train Win Rate: 37.80%	Test Win Rate: 73.33%	Test Mean Steps: 3.7333


'test_aug-15-100'

## Train Comb Letters

### $k=1$

In [72]:
ohe_matrix = ActionCombLetters._make_ohe(vocabulary=wordle_list, k=1)
rewards = {'repeat':-0.1, 'B':1, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}

print(ohe_matrix.shape)

torch.Size([130, 12972]) torch.Size([4782, 12972])


In [73]:
def train(**kwargs):
    return exp_with_action('comb_letters', rewards, ohe_matrix=ohe_matrix, k=1, wordle_list=wordle_list, **kwargs)

In [74]:
n_guesses = 100
data = get_data(n_guesses, overfit=True)

guesses: 100
train answers: 15
test answers: 15 (overfit strategy)
augmented: 0


In [75]:
train(
    data=data,
    
    n_episodes=1000,
    n_episodes_warm=10,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.999,
    
    alpha=0.4,
    rb_size=int(1e6),
    method_name='testbed',

    n_envs=8,
    optimize_interval=8,

    agent_path=None
)

Initial Test. 	Win Rate: 0.00%	Mean Steps: 6.0000


TRAIN EPISODES:   0%|          | 0/1000 [00:00<?, ?it/s]


Episodes:  125	Time: 0 s	Agent Eps: 0.91	Train Win Rate: 7.20%	Test Win Rate: 6.67%	Test Mean Steps: 5.6667

Episodes:  250	Time: 1 s	Agent Eps: 0.83	Train Win Rate: 12.00%	Test Win Rate: 20.00%	Test Mean Steps: 5.5333

Episodes:  375	Time: 1 s	Agent Eps: 0.76	Train Win Rate: 12.80%	Test Win Rate: 33.33%	Test Mean Steps: 4.8000

Episodes:  500	Time: 2 s	Agent Eps: 0.70	Train Win Rate: 18.40%	Test Win Rate: 40.00%	Test Mean Steps: 4.4667

Episodes:  625	Time: 2 s	Agent Eps: 0.64	Train Win Rate: 28.00%	Test Win Rate: 40.00%	Test Mean Steps: 4.6000

Episodes:  751	Time: 3 s	Agent Eps: 0.59	Train Win Rate: 35.71%	Test Win Rate: 60.00%	Test Mean Steps: 4.0667

Episodes:  877	Time: 3 s	Agent Eps: 0.54	Train Win Rate: 30.95%	Test Win Rate: 60.00%	Test Mean Steps: 4.4000

Episodes: 1005	Time: 3 s	Agent Eps: 0.50	Train Win Rate: 50.78%	Test Win Rate: 66.67%	Test Mean Steps: 3.6667


'testbed-15-100 (2)'

### $k=2$

In [None]:
ohe_matrix = ActionCombLetters._make_ohe(vocabulary=wordle_list, k=2)
rewards = {'repeat':-0.1, 'B':1, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}

print(ohe_matrix.shape)

In [76]:
def train(**kwargs):
    return exp_with_action('comb_letters', rewards, ohe_matrix=ohe_matrix, k=2, wordle_list=wordle_list, **kwargs)

In [78]:
train(
    data=data,
    
    n_episodes=1000,
    n_episodes_warm=10,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.99954,
    
    alpha=0.4,
    rb_size=int(1e6),
    method_name='testbed',

    n_envs=8,
    optimize_interval=8,

    agent_path=None
)

Initial Test. 	Win Rate: 13.33%	Mean Steps: 5.4000


TRAIN EPISODES:   0%|          | 0/1000 [00:00<?, ?it/s]


Episodes:  128	Time: 1 s	Agent Eps: 0.96	Train Win Rate: 3.12%	Test Win Rate: 6.67%	Test Mean Steps: 5.7333

Episodes:  253	Time: 1 s	Agent Eps: 0.92	Train Win Rate: 8.80%	Test Win Rate: 20.00%	Test Mean Steps: 5.2000

Episodes:  378	Time: 1 s	Agent Eps: 0.88	Train Win Rate: 12.00%	Test Win Rate: 20.00%	Test Mean Steps: 5.2667

Episodes:  504	Time: 2 s	Agent Eps: 0.84	Train Win Rate: 19.05%	Test Win Rate: 20.00%	Test Mean Steps: 5.1333

Episodes:  629	Time: 2 s	Agent Eps: 0.81	Train Win Rate: 22.40%	Test Win Rate: 66.67%	Test Mean Steps: 4.2000

Episodes:  754	Time: 3 s	Agent Eps: 0.78	Train Win Rate: 35.20%	Test Win Rate: 66.67%	Test Mean Steps: 3.9333

Episodes:  880	Time: 3 s	Agent Eps: 0.75	Train Win Rate: 32.54%	Test Win Rate: 66.67%	Test Mean Steps: 3.9333

Episodes: 1007	Time: 4 s	Agent Eps: 0.72	Train Win Rate: 49.61%	Test Win Rate: 73.33%	Test Mean Steps: 3.7333


'testbed-15-100 (4)'

## Train Wagons

### $k=1$

In [36]:
ohe_matrix = ActionWagons._make_ohe(vocabulary=wordle_list, k=1)
rewards = {'repeat':-0.1, 'B':1, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}

print(ohe_matrix.shape)

torch.Size([130, 12972]) torch.Size([1155, 12972])


In [79]:
def train(**kwargs):
    return exp_with_action('wagons', rewards, ohe_matrix=ohe_matrix, k=1, wordle_list=wordle_list, **kwargs)

In [80]:
n_guesses = 100
data = get_data(n_guesses, overfit=True)

guesses: 100
train answers: 15
test answers: 15 (overfit strategy)
augmented: 0


In [82]:
train(
    data=data,
    
    n_episodes=1000,
    n_episodes_warm=10,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.99964,
    
    alpha=0.4,
    rb_size=int(1e6),
    method_name='testbed',

    n_envs=8,
    optimize_interval=8,

    agent_path=None
)

Initial Test. 	Win Rate: 0.00%	Mean Steps: 6.0000


TRAIN EPISODES:   0%|          | 0/1000 [00:00<?, ?it/s]


Episodes:  128	Time: 0 s	Agent Eps: 0.97	Train Win Rate: 2.34%	Test Win Rate: 13.33%	Test Mean Steps: 5.4000

Episodes:  253	Time: 1 s	Agent Eps: 0.93	Train Win Rate: 8.80%	Test Win Rate: 6.67%	Test Mean Steps: 5.6667

Episodes:  378	Time: 1 s	Agent Eps: 0.90	Train Win Rate: 9.60%	Test Win Rate: 13.33%	Test Mean Steps: 5.4667

Episodes:  504	Time: 2 s	Agent Eps: 0.87	Train Win Rate: 13.49%	Test Win Rate: 46.67%	Test Mean Steps: 4.6667

Episodes:  630	Time: 2 s	Agent Eps: 0.84	Train Win Rate: 19.84%	Test Win Rate: 46.67%	Test Mean Steps: 4.8000

Episodes:  755	Time: 3 s	Agent Eps: 0.82	Train Win Rate: 20.80%	Test Win Rate: 66.67%	Test Mean Steps: 3.9333

Episodes:  882	Time: 3 s	Agent Eps: 0.79	Train Win Rate: 23.62%	Test Win Rate: 53.33%	Test Mean Steps: 4.2667

Episodes: 1008	Time: 3 s	Agent Eps: 0.77	Train Win Rate: 30.16%	Test Win Rate: 60.00%	Test Mean Steps: 4.0667


'testbed-15-100 (6)'

### $k=2$

In [None]:
ohe2 = ActionWagons._make_ohe(vocabulary=wordle_list, k=2)
rewards = {'repeat':-0.1, 'B':1, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}

print(ohe_matrix.shape)

In [83]:
def train(**kwargs):
    return exp_with_action('wagons', rewards, ohe_matrix=ohe2, k=2, wordle_list=wordle_list, **kwargs)

In [85]:
train(
    data=data,
    
    n_episodes=1000,
    n_episodes_warm=10,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.999,
    
    alpha=0.4,
    rb_size=int(1e6),
    method_name='testbed',

    n_envs=8,
    optimize_interval=8,

    agent_path=None
)

Initial Test. 	Win Rate: 0.00%	Mean Steps: 6.0000


TRAIN EPISODES:   0%|          | 0/1000 [00:00<?, ?it/s]


Episodes:  125	Time: 1 s	Agent Eps: 0.91	Train Win Rate: 8.80%	Test Win Rate: 6.67%	Test Mean Steps: 5.7333

Episodes:  251	Time: 1 s	Agent Eps: 0.83	Train Win Rate: 11.11%	Test Win Rate: 33.33%	Test Mean Steps: 4.8000

Episodes:  376	Time: 1 s	Agent Eps: 0.76	Train Win Rate: 21.60%	Test Win Rate: 40.00%	Test Mean Steps: 4.7333

Episodes:  503	Time: 2 s	Agent Eps: 0.69	Train Win Rate: 22.83%	Test Win Rate: 40.00%	Test Mean Steps: 4.4000

Episodes:  628	Time: 2 s	Agent Eps: 0.64	Train Win Rate: 27.20%	Test Win Rate: 60.00%	Test Mean Steps: 3.7333

Episodes:  755	Time: 3 s	Agent Eps: 0.59	Train Win Rate: 49.61%	Test Win Rate: 46.67%	Test Mean Steps: 4.4667

Episodes:  880	Time: 3 s	Agent Eps: 0.55	Train Win Rate: 50.40%	Test Win Rate: 60.00%	Test Mean Steps: 4.0000

Episodes: 1008	Time: 4 s	Agent Eps: 0.50	Train Win Rate: 47.66%	Test Win Rate: 66.67%	Test Mean Steps: 3.8667


'testbed-15-100 (8)'

## Dordle

In [8]:
ohe_matrix = ActionLetters._make_ohe(vocabulary=wordle_list)
rewards = {'repeat': -0.1, 'B':1, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}

print(ohe_matrix.shape)

torch.Size([130, 12972])


In [9]:
def get_data(n_guesses, n_boards):
    return get_dordle_data(n_guesses, n_boards, guesses, indices, in_answers)

def train(**kwargs):
    return play_dordle(rewards, ohe_matrix, wordle_list, **kwargs)

In [10]:
n_boards = 2
data = get_data(n_guesses=500, n_boards=n_boards)

guesses: 500
answers: 89


In [13]:
train(
    data=data,
    
    n_episodes=1000,
    n_episodes_warm=10,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.999,
    
    alpha=0.4,
    rb_size=int(1e6),
    method_name='dordle_testbed',

    n_envs=8,
    optimize_interval=8,

    agent_path=None
)

IndexError: list assignment index out of range

## More Letters