# Testing Action with Comb Letters

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
! cp -a -n /content/drive/MyDrive/wordle-rl/. /content/

In [3]:
! mkdir /content/drive/MyDrive/wordle-rl/tests

mkdir: cannot create directory ‘/content/drive/MyDrive/wordle-rl/tests’: File exists


In [4]:
! pip install cpprb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting cpprb
  Downloading cpprb-10.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: cpprb
Successfully installed cpprb-10.7.1


In [5]:
! pip install annoy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting annoy
  Downloading annoy-1.17.2.tar.gz (647 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.4/647.4 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25l[?25hdone
  Created wheel for annoy: filename=annoy-1.17.2-cp39-cp39-linux_x86_64.whl size=582217 sha256=96a8cbc43a950b2197a64ac7a8f16c8f2c906dad96c5c6a79efd65ce2e06d9a0
  Stored in directory: /root/.cache/pip/wheels/f2/2e/e4/f3ae385c375b87982a2a70055061d4a6330ef4f60817e717e3
Successfully built annoy
Installing collected packages: annoy
Successfully installed annoy-1.17.2


In [8]:
%load_ext autoreload
%autoreload 2

from functools import partial
from collections import defaultdict
import pickle

from wordle.wordlenp import Wordle
from environment.environment import Environment, StateYesNo, StateVocabulary
from environment.action import ActionVocabulary, ActionLetters, ActionCombLetters
from dqn.agent import Agent
from dqn.train import Trainer
from replay_buffer.cpprb import PrioritizedReplayBuffer, ReplayBuffer

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
import torch
import numpy as np
np.random.seed(0)

"cuda:0" if torch.cuda.is_available() else "cpu"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


'cpu'

## Датасеты

### Multi-stage data

In [42]:
import bisect


answers = Wordle._load_vocabulary('wordle/answers.txt', astype=list)
guesses = Wordle._load_vocabulary('wordle/guesses.txt', astype=np.array)
wordle_list = guesses.copy().tolist()

in_answers = []
for i, word in enumerate(guesses):
  loc = bisect.bisect_left(answers, word)
  if len(answers) > loc and answers[loc] == word:
    in_answers.append(i)

  and should_run_async(code)


In [16]:
len(answers), len(guesses)

(2315, 12972)

In [17]:
guesses_indices = np.arange(len(guesses))
np.random.shuffle(guesses_indices)

## Plotting Utility

In [18]:
def plot_results(tasks_results, figname):
    n_tasks = len(tasks_results)
    _, ax = plt.subplots(1, n_tasks, figsize=(4*n_tasks, 5))
    
    color = ['b','g','r','c','m','y','k','w']

    # over tasks
    for i, (task_name, results) in enumerate(tasks_results.items()):
        # over methods
        for c, (method_name, res) in enumerate(results.items()):
            train_timers, train_win_rates, test_timers, test_win_rates = res
            ax[i].plot(train_timers, train_win_rates, label=method_name+' (train)', c=color[c], alpha=0.2)
            ax[i].plot(test_timers, test_win_rates, label=method_name+' (test)', c=color[c])
        ax[i].set_xlabel('time, s')
        ax[i].set_ylabel('win rate, s')
        ax[i].legend()
        ax[i].set_title(task_name)
    plt.savefig(figname + '.svg', bbox_inches='tight')
    plt.show()

## LETS GO

In [19]:
ohe1 = ActionCombLetters(vocabulary=wordle_list, k=1).ohe_matrix
ohe2 = ActionCombLetters(vocabulary=wordle_list, k=2).ohe_matrix
print(ohe1.shape, ohe2.shape)
step_rewards = {'B':0, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}
tasks_results = defaultdict(dict)

torch.Size([130, 12972]) torch.Size([4782, 12972])


In [40]:
def experiment(
        answers, guesses, play_batch_size,
        optimize_interval, n_batches, n_batches_warm, k=1,
        eps_start=1, eps_end=0.05, eps_decay=0.9995, model_path=None
):
    
    env_list = []
    for _ in range(play_batch_size):
        env = Environment(
            rewards=step_rewards,
            wordle=Wordle(vocabulary=guesses, answers=answers),
            state_instance=StateYesNo()
        )
        env_list.append(env)

    agent = Agent(
        state_size=env.state.size,
        action_instance=ActionCombLetters(
            k=k, vocabulary=guesses,
            ohe_matrix= ohe1 if k == 1 else ohe2,
            wordle_list=wordle_list
        ),
        replay_buffer=PrioritizedReplayBuffer(state_size=env.state.size, alpha=1),
        optimize_interval=optimize_interval,
        model_path=model_path
    )

    trainer = Trainer(
        env_list, agent,
        play_batch_size=play_batch_size,
        n_batches=n_batches,
        n_batches_warm=n_batches_warm
    )
    
    problem_name = f'{len(answers)}-{len(guesses)}'
    method_name = 'multi-stage'

    res = trainer.train(
        eps_start=eps_start,
        eps_end=eps_end,
        eps_decay=eps_decay,
        nickname='{method_name}-{problem_name}'
    )
    tasks_results[problem_name][method_name] = res

    return agent.dump_models(nickname=problem_name)

In [23]:
n_guesses = 4000
guesses_cur = guesses[guesses_indices[:n_guesses]]
answers_cur = guesses[[i_guess for i_guess in guesses_indices[:n_guesses] if i_guess in in_answers]]
print(len(answers_cur))

740


In [26]:
model_path = experiment(
    answers_cur, guesses_cur,
    play_batch_size=8, optimize_interval=8,
    n_batches=40000, n_batches_warm=10,
    eps_start=1,
    eps_end=0.01,
    model_path=None
)

WARM BATCHES:   0%|          | 0/10 [00:00<?, ?it/s]

TRAIN BATCHES:   0%|          | 0/40000 [00:00<?, ?it/s]


Batch 5000	Time: 549 s	Agent Eps: 0.08	Train Win Rate: 1.54%	Test Win Rate: 2.30%	Test Mean Steps: 4.53

Batch 10000	Time: 1124 s	Agent Eps: 0.01	Train Win Rate: 7.12%	Test Win Rate: 6.76%	Test Mean Steps: 4.08

Batch 15000	Time: 1691 s	Agent Eps: 0.01	Train Win Rate: 26.32%	Test Win Rate: 22.84%	Test Mean Steps: 4.34

Batch 20000	Time: 2258 s	Agent Eps: 0.01	Train Win Rate: 51.40%	Test Win Rate: 52.84%	Test Mean Steps: 3.99

Batch 25000	Time: 2798 s	Agent Eps: 0.01	Train Win Rate: 74.52%	Test Win Rate: 74.32%	Test Mean Steps: 3.88

Batch 30000	Time: 3279 s	Agent Eps: 0.01	Train Win Rate: 88.60%	Test Win Rate: 92.16%	Test Mean Steps: 3.64

Batch 35000	Time: 3732 s	Agent Eps: 0.01	Train Win Rate: 93.48%	Test Win Rate: 94.19%	Test Mean Steps: 3.55

Batch 40000	Time: 4172 s	Agent Eps: 0.01	Train Win Rate: 95.92%	Test Win Rate: 97.03%	Test Mean Steps: 3.53

Saving checkpoint... Saved to multi-stage-740-4000-1.pth


In [27]:
! cp *.txt /content/drive/MyDrive/wordle-rl/tests
! cp *.pth /content/drive/MyDrive/wordle-rl/tests
! cp *.pickle /content/drive/MyDrive/wordle-rl/tests

cp: cannot stat '*.pickle': No such file or directory


In [28]:
n_guesses = 8000
guesses_cur = guesses[guesses_indices[:n_guesses]]
answers_cur = guesses[[i_guess for i_guess in guesses_indices[:n_guesses] if i_guess in in_answers]]
print(len(answers_cur))

1439


In [29]:
model_path = experiment(
    answers_cur, guesses_cur,
    play_batch_size=8, optimize_interval=8,
    n_batches=40000, n_batches_warm=0,
    eps_start=0.01,
    eps_end=0.01,
    model_path=model_path
)

  and should_run_async(code)


WARM BATCHES: 0it [00:00, ?it/s]

TRAIN BATCHES:   0%|          | 0/40000 [00:00<?, ?it/s]


Batch 5000	Time: 549 s	Agent Eps: 0.01	Train Win Rate: 76.40%	Test Win Rate: 78.39%	Test Mean Steps: 3.90

Batch 10000	Time: 1071 s	Agent Eps: 0.01	Train Win Rate: 85.44%	Test Win Rate: 87.35%	Test Mean Steps: 3.80

Batch 15000	Time: 1585 s	Agent Eps: 0.01	Train Win Rate: 88.44%	Test Win Rate: 90.90%	Test Mean Steps: 3.80

Batch 20000	Time: 2084 s	Agent Eps: 0.01	Train Win Rate: 90.92%	Test Win Rate: 93.12%	Test Mean Steps: 3.75

Batch 25000	Time: 2581 s	Agent Eps: 0.01	Train Win Rate: 92.64%	Test Win Rate: 94.72%	Test Mean Steps: 3.77

Batch 30000	Time: 3076 s	Agent Eps: 0.01	Train Win Rate: 93.64%	Test Win Rate: 95.07%	Test Mean Steps: 3.75

Batch 35000	Time: 3571 s	Agent Eps: 0.01	Train Win Rate: 94.22%	Test Win Rate: 96.73%	Test Mean Steps: 3.71

Batch 40000	Time: 4061 s	Agent Eps: 0.01	Train Win Rate: 95.86%	Test Win Rate: 97.22%	Test Mean Steps: 3.69

Saving checkpoint... Saved to multi-stage-1439-8000-1.pth


In [30]:
! cp *.txt /content/drive/MyDrive/wordle-rl/tests
! cp *.pth /content/drive/MyDrive/wordle-rl/tests
! cp *.pickle /content/drive/MyDrive/wordle-rl/tests

cp: cannot stat '*.pickle': No such file or directory


In [31]:
len(answers), len(guesses)

  and should_run_async(code)


(2315, 12972)

In [32]:
model_path = experiment(
    answers, guesses,
    play_batch_size=8, optimize_interval=8,
    n_batches=50000, n_batches_warm=0,
    eps_start=0.01,
    eps_end=0.01,
    model_path=model_path
)

  and should_run_async(code)


WARM BATCHES: 0it [00:00, ?it/s]

TRAIN BATCHES:   0%|          | 0/50000 [00:00<?, ?it/s]


Batch 6250	Time: 989 s	Agent Eps: 0.01	Train Win Rate: 74.18%	Test Win Rate: 74.47%	Test Mean Steps: 3.99

Batch 12500	Time: 1955 s	Agent Eps: 0.01	Train Win Rate: 81.95%	Test Win Rate: 82.72%	Test Mean Steps: 3.95

Batch 18750	Time: 2884 s	Agent Eps: 0.01	Train Win Rate: 85.86%	Test Win Rate: 87.13%	Test Mean Steps: 3.95

Batch 25000	Time: 3812 s	Agent Eps: 0.01	Train Win Rate: 86.48%	Test Win Rate: 88.55%	Test Mean Steps: 3.95

Batch 31250	Time: 4728 s	Agent Eps: 0.01	Train Win Rate: 88.94%	Test Win Rate: 91.66%	Test Mean Steps: 3.93

Batch 37500	Time: 5624 s	Agent Eps: 0.01	Train Win Rate: 90.40%	Test Win Rate: 92.31%	Test Mean Steps: 3.93

Batch 43750	Time: 6514 s	Agent Eps: 0.01	Train Win Rate: 90.43%	Test Win Rate: 93.00%	Test Mean Steps: 3.91

Batch 50000	Time: 7406 s	Agent Eps: 0.01	Train Win Rate: 90.42%	Test Win Rate: 93.74%	Test Mean Steps: 3.92

Saving checkpoint... Saved to multi-stage-2315-12972-1.pth


In [45]:
! cp *.txt /content/drive/MyDrive/wordle-rl/tests
! cp *.pth /content/drive/MyDrive/wordle-rl/tests
! cp *.pickle /content/drive/MyDrive/wordle-rl/tests

cp: cannot stat '*.pickle': No such file or directory


In [44]:
model_path = experiment(
    answers, guesses,
    play_batch_size=8, optimize_interval=8,
    n_batches=30000, n_batches_warm=64,
    eps_start=0.01,
    eps_end=0.001,
    model_path=model_path
)

WARM BATCHES:   0%|          | 0/64 [00:00<?, ?it/s]

TRAIN BATCHES:   0%|          | 0/30000 [00:00<?, ?it/s]


Batch 3750	Time: 517 s	Agent Eps: 0.00	Train Win Rate: 93.81%	Test Win Rate: 94.73%	Test Mean Steps: 3.90

Batch 7500	Time: 1034 s	Agent Eps: 0.00	Train Win Rate: 95.57%	Test Win Rate: 95.59%	Test Mean Steps: 3.87

Batch 11250	Time: 1552 s	Agent Eps: 0.00	Train Win Rate: 95.31%	Test Win Rate: 96.80%	Test Mean Steps: 3.84

Batch 15000	Time: 2069 s	Agent Eps: 0.00	Train Win Rate: 96.05%	Test Win Rate: 96.50%	Test Mean Steps: 3.85

Batch 18750	Time: 2586 s	Agent Eps: 0.00	Train Win Rate: 95.89%	Test Win Rate: 96.89%	Test Mean Steps: 3.84

Batch 22500	Time: 3104 s	Agent Eps: 0.00	Train Win Rate: 96.72%	Test Win Rate: 96.80%	Test Mean Steps: 3.82

Batch 26250	Time: 3619 s	Agent Eps: 0.00	Train Win Rate: 96.59%	Test Win Rate: 96.80%	Test Mean Steps: 3.82

Batch 30000	Time: 4136 s	Agent Eps: 0.00	Train Win Rate: 97.15%	Test Win Rate: 97.62%	Test Mean Steps: 3.83

Saving checkpoint... Saved to {method_name}-{problem_name}-1.pth
