# Beating Wordle

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
! cp -a -n /content/drive/MyDrive/wordle-rl/. /content/
! mkdir /content/drive/MyDrive/wordle-rl/tests
! pip install cpprb
! pip install annoy

mkdir: cannot create directory ‘/content/drive/MyDrive/wordle-rl/tests’: File exists
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting cpprb
  Downloading cpprb-10.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m63.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: cpprb
Successfully installed cpprb-10.7.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting annoy
  Downloading annoy-1.17.2.tar.gz (647 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.4/647.4 kB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25l[?25hdone
  Created wheel for annoy: filename=annoy-1.17.2-cp39-cp39-linux_x86_6

In [5]:
%load_ext autoreload
%autoreload 2

from functools import partial
from collections import defaultdict
import pickle

from wordle.wordlenp import Wordle
from environment.environment import Environment, StateYesNo, StateVocabulary
from environment.action import ActionVocabulary, ActionLetters, ActionCombLetters
from dqn.agent import Agent
from dqn.train import Trainer
from replay_buffer.cpprb import PrioritizedReplayBuffer, ReplayBuffer

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
import torch
import numpy as np
np.random.seed(0)

"cuda:0" if torch.cuda.is_available() else "cpu"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'cuda:0'

## Датасеты

### Multi-stage data

In [6]:
import bisect


answers = Wordle._load_vocabulary('wordle/answers.txt', astype=list)
guesses = Wordle._load_vocabulary('wordle/guesses.txt', astype=np.array)
wordle_list = guesses.copy().tolist()

in_answers = []
for i, word in enumerate(guesses):
  loc = bisect.bisect_left(answers, word)
  if len(answers) > loc and answers[loc] == word:
    in_answers.append(i)

In [7]:
len(answers), len(guesses)

(2315, 12972)

In [8]:
guesses_indices = np.arange(len(guesses))
np.random.shuffle(guesses_indices)

## LETS GO

In [9]:
ohe1 = ActionCombLetters(vocabulary=wordle_list, k=1).ohe_matrix
ohe2 = ActionCombLetters(vocabulary=wordle_list, k=2).ohe_matrix
print(ohe1.shape, ohe2.shape)
hard_rewards = {'B':0, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}
tasks_results = defaultdict(dict)

torch.Size([130, 12972]) torch.Size([4782, 12972])


In [10]:
def test(answers, guesses, model_path=None, k=1):
    env = Environment(
        rewards=hard_rewards,
        wordle=Wordle(vocabulary=guesses, answers=answers),
        state_instance=StateYesNo()
    )

    agent = Agent(
        state_size=env.state.size,
        action_instance=ActionCombLetters(
            k=k, vocabulary=guesses,
            ohe_matrix= ohe1 if k == 1 else ohe2,
            wordle_list=wordle_list
        ),
        replay_buffer=PrioritizedReplayBuffer(state_size=env.state.size, alpha=0),
        model_path=model_path
    )

    trainer = Trainer(env, agent, is_parallel=False)

    agent.eps=0
    return trainer.test(log_game=False, return_result=True)

In [12]:
success, _, _ = test(
    answers, guesses,
    model_path={'local': 'check97_62.pth', 'target': 'check97_62.pth'}
)

In [13]:
mistakes = np.nonzero(success == 0)[0]
hard_answers = np.array(answers)[mistakes].tolist()
print(len(hard_answers), hard_answers)

55 ['abode', 'adorn', 'aglow', 'alpha', 'amaze', 'anode', 'aphid', 'baggy', 'baker', 'datum', 'dilly', 'dimly', 'dizzy', 'dozen', 'field', 'filmy', 'gazer', 'hazel', 'homer', 'hound', 'hydro', 'idler', 'igloo', 'joker', 'krill', 'lapel', 'liver', 'madly', 'manly', 'might', 'milky', 'ninja', 'odder', 'offer', 'piggy', 'poker', 'polka', 'pudgy', 'pupal', 'rival', 'satin', 'shape', 'silky', 'silly', 'spade', 'spiny', 'spite', 'unwed', 'usher', 'viral', 'waver', 'waxen', 'wight', 'wimpy', 'yearn']


In [49]:
def experiment(
        answers, guesses, hard_answers, play_batch_size,
        optimize_interval, n_batches, n_batches_warm, k=1,
        eps_start=1, eps_end=0.05, eps_decay=0.9995, model_path=None,
        alpha=0.1, test_first=False, checkpoint_interval=None
):
    env_list = []
    for _ in range(play_batch_size-1):
        env = Environment(
            rewards=hard_rewards,
            wordle=Wordle(vocabulary=guesses, answers=answers),
            state_instance=StateYesNo()
        )
        env_list.append(env)

    env_list.append(Environment(
        rewards=hard_rewards,
        wordle=Wordle(vocabulary=guesses, answers=hard_answers),
        state_instance=StateYesNo()
    ))

    agent = Agent(
        state_size=env.state.size,
        action_instance=ActionCombLetters(
            k=k, vocabulary=guesses,
            ohe_matrix= ohe1 if k == 1 else ohe2,
            wordle_list=wordle_list,
        ),
        replay_buffer=PrioritizedReplayBuffer(state_size=env.state.size, alpha=alpha),
        optimize_interval=optimize_interval,
        model_path=model_path,
        optimizer=partial(torch.optim.Adam, lr=5e-5),

    )

    trainer = Trainer(
        env_list, agent,
        play_batch_size=play_batch_size,
        n_batches=n_batches,
        n_batches_warm=n_batches_warm,
        checkpoint_interval=checkpoint_interval
    )
    
    problem_name = f'{len(answers)}-{len(guesses)}'
    method_name = 'multi-stage'

    res = trainer.train(
        eps_start=eps_start,
        eps_end=eps_end,
        eps_decay=eps_decay,
        nickname=f'{method_name}-{problem_name}',
        test_first=test_first
    )
    tasks_results[problem_name][method_name] = res

    return agent.dump_models(nickname=problem_name)

In [40]:
! rm *.txt

lr=1e-4

In [41]:
model_path = experiment(
    answers, guesses, hard_answers,
    play_batch_size=8, optimize_interval=8,
    n_batches=50000, n_batches_warm=0,
    eps_start=0.,
    eps_end=0.,
    model_path={'local': 'check97_62.pth', 'target': 'check97_62.pth'},
    alpha=.4,
    test_first=False
)

WARM BATCHES: 0it [00:00, ?it/s]

TRAIN BATCHES:   0%|          | 0/50000 [00:00<?, ?it/s]


Batch 6250	Time: 173 s	Agent Eps: 0.00	Train Win Rate: 92.90%	Test Win Rate: 98.14%	Test Mean Steps: 3.79

Batch 12500	Time: 351 s	Agent Eps: 0.00	Train Win Rate: 92.91%	Test Win Rate: 98.40%	Test Mean Steps: 3.79

Batch 18750	Time: 530 s	Agent Eps: 0.00	Train Win Rate: 94.08%	Test Win Rate: 98.66%	Test Mean Steps: 3.79

Batch 25000	Time: 708 s	Agent Eps: 0.00	Train Win Rate: 93.58%	Test Win Rate: 98.83%	Test Mean Steps: 3.78

Batch 31250	Time: 886 s	Agent Eps: 0.00	Train Win Rate: 94.03%	Test Win Rate: 98.83%	Test Mean Steps: 3.78

Batch 37500	Time: 1065 s	Agent Eps: 0.00	Train Win Rate: 94.54%	Test Win Rate: 98.88%	Test Mean Steps: 3.78

Batch 43750	Time: 1244 s	Agent Eps: 0.00	Train Win Rate: 94.88%	Test Win Rate: 99.09%	Test Mean Steps: 3.79

Batch 50000	Time: 1423 s	Agent Eps: 0.00	Train Win Rate: 94.90%	Test Win Rate: 99.09%	Test Mean Steps: 3.79

Saving checkpoint... Saved to multi-stage-2315-12972-1.pth


In [42]:
! cp *.txt /content/drive/MyDrive/wordle-rl/tests
! cp *.pth /content/drive/MyDrive/wordle-rl/tests

lr=1e-4

In [45]:
model_path = experiment(
    answers, guesses, hard_answers,
    play_batch_size=16, optimize_interval=16,
    n_batches=50000, n_batches_warm=0,
    eps_start=0.,
    eps_end=0.,
    model_path={'local': 'check99_09.pth', 'target': 'check99_09.pth'},
    alpha=.4,
    test_first=False,
    checkpoint_interval=6250
)

WARM BATCHES: 0it [00:00, ?it/s]

TRAIN BATCHES:   0%|          | 0/50000 [00:00<?, ?it/s]


Batch 6250	Time: 241 s	Agent Eps: 0.00	Train Win Rate: 97.22%	Test Win Rate: 99.01%	Test Mean Steps: 3.78

Saving checkpoint... Saved to multi-stage-2315-12972-1.pth

Batch 12500	Time: 486 s	Agent Eps: 0.00	Train Win Rate: 97.23%	Test Win Rate: 99.14%	Test Mean Steps: 3.79

Saving checkpoint... Saved to multi-stage-2315-12972-2.pth

Batch 18750	Time: 728 s	Agent Eps: 0.00	Train Win Rate: 97.58%	Test Win Rate: 99.18%	Test Mean Steps: 3.79

Saving checkpoint... Saved to multi-stage-2315-12972-3.pth

Batch 25000	Time: 967 s	Agent Eps: 0.00	Train Win Rate: 97.81%	Test Win Rate: 99.27%	Test Mean Steps: 3.79

Saving checkpoint... Saved to multi-stage-2315-12972-4.pth

Batch 31250	Time: 1208 s	Agent Eps: 0.00	Train Win Rate: 97.89%	Test Win Rate: 99.22%	Test Mean Steps: 3.79

Saving checkpoint... Saved to multi-stage-2315-12972-5.pth

Batch 37500	Time: 1446 s	Agent Eps: 0.00	Train Win Rate: 97.62%	Test Win Rate: 99.14%	Test Mean Steps: 3.79

Saving checkpoint... Saved to multi-stage-2315-129

In [48]:
! cp *.txt /content/drive/MyDrive/wordle-rl/tests
! cp *.pth /content/drive/MyDrive/wordle-rl/tests

cp: cannot stat '*.txt': No such file or directory
