# New train/test strategy

In [1]:
%load_ext autoreload
%autoreload 2

from functools import partial
from collections import defaultdict
import pickle

from wordle.wordlenp import Wordle
from environment.environment import Environment, StateYesNo, StateVocabulary
from environment.action import ActionVocabulary, ActionLetters, ActionCombLetters
from dqn.agent import Agent
from dqn.train import Trainer
from replay_buffer.cpprb import PrioritizedReplayBuffer, ReplayBuffer

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style='whitegrid')
import torch
import numpy as np
np.random.seed(0)

"cuda:0" if torch.cuda.is_available() else "cpu"

'cuda:0'

## Data

In [2]:
import bisect


answers = Wordle._load_vocabulary('wordle/answers.txt', astype=list)
guesses = Wordle._load_vocabulary('wordle/guesses.txt', astype=np.array)
wordle_list = guesses.copy().tolist()

in_answers = []
for i, word in enumerate(guesses):
  loc = bisect.bisect_left(answers, word)
  if len(answers) > loc and answers[loc] == word:
    in_answers.append(i)

print(len(answers), len(guesses))

indices = np.arange(len(guesses))
np.random.shuffle(indices)

2315 12972


## Testing buffer saving

### Create

In [20]:
from cpprb import ReplayBuffer as RB, PrioritizedReplayBuffer as PRB


buffer_size = 256
obs_shape = 3
act_dim = 1
rb = RB(
    buffer_size,
    env_dict = {
        "obs": {"shape": obs_shape},
        "act": {"shape": act_dim},
        "rew": {},
        "next_obs": {"shape": obs_shape},
        "done": {}
    }
)

### Fill

In [21]:
obs = np.ones(shape=(obs_shape))
act = np.ones(shape=(act_dim))
rew = 0
next_obs = np.ones(shape=(obs_shape))
done = 0

for i in range(5):
    rb.add(obs=obs,act=act,rew=rew,next_obs=next_obs,done=done)

    if done:
        # Together with resetting environment, call ReplayBuffer.on_episode_end()
        rb.on_episode_end()

rb.get_all_transitions()

{'obs': array([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=float32),
 'act': array([[1.],
        [1.],
        [1.],
        [1.],
        [1.]], dtype=float32),
 'rew': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]], dtype=float32),
 'next_obs': array([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=float32),
 'done': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]], dtype=float32)}

### Save

In [22]:
rb.save_transitions('buffer')

### Load

In [23]:
rb = PRB(
    buffer_size,
    {
        "obs": {"shape": obs_shape},
        "act": {"shape": act_dim},
        "rew": {},
        "next_obs": {"shape": obs_shape},
        "done": {}
    },
    alpha=1
)
rb.load_transitions('buffer.npz')
rb.get_all_transitions()

{'obs': array([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=float32),
 'act': array([[1.],
        [1.],
        [1.],
        [1.],
        [1.]], dtype=float32),
 'rew': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]], dtype=float32),
 'next_obs': array([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=float32),
 'done': array([[0.],
        [0.],
        [0.],
        [0.],
        [0.]], dtype=float32)}

## Train Configuration

In [4]:
ohe1 = ActionCombLetters(vocabulary=wordle_list, k=1).ohe_matrix
ohe2 = ActionCombLetters(vocabulary=wordle_list, k=2).ohe_matrix
print(ohe1.shape, ohe2.shape)
step_rewards = {'B':0, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}
tasks_results = defaultdict(dict)

torch.Size([130, 12972]) torch.Size([4782, 12972])


In [21]:
def agent_path(nickname, t):
    return {
        'local': f'{nickname}/local-{t}.pth',
        'target': f'{nickname}/target-{t}.pth',
        'buffer': f'{nickname}/buffer-{t}.npz'
    }

In [None]:
def get_data(n_guesses, overfit):
    return Trainer.train_test_split(n_guesses, overfit, guesses, indices, in_answers)

## Experiments

### Without Overfitting

на трейне загадывает все слова кроме тестовых

In [17]:
n_guesses = 2000
data = get_data(n_guesses, overfit=False)

train answers: 1667
test answers: 333
guesses: 2000


In [22]:
nickname = Trainer.train_comb_letters(
    data=data,
    
    n_batches=40000,
    n_batches_warm=10,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.95,
    
    n_envs=8,
    k=1, 
    optimize_interval=8,

    agent_path=None
)

WARM BATCHES:   0%|          | 0/10 [00:00<?, ?it/s]


Batch    0	Test Win Rate: 0.60%	Test Mean Steps: 2.50


TRAIN BATCHES:   0%|          | 0/40000 [00:00<?, ?it/s]


Batch 5000	Time: 107 s	Agent Eps: 0.01	Train Win Rate: 0.88%	Test Win Rate: 1.50%	Test Mean Steps: 4.20

Batch 10000	Time: 216 s	Agent Eps: 0.01	Train Win Rate: 2.90%	Test Win Rate: 0.60%	Test Mean Steps: 3.50

Batch 15000	Time: 323 s	Agent Eps: 0.01	Train Win Rate: 7.12%	Test Win Rate: 3.00%	Test Mean Steps: 4.00

Batch 20000	Time: 427 s	Agent Eps: 0.01	Train Win Rate: 33.96%	Test Win Rate: 10.51%	Test Mean Steps: 4.51

Batch 25000	Time: 519 s	Agent Eps: 0.01	Train Win Rate: 66.34%	Test Win Rate: 32.13%	Test Mean Steps: 4.47

Batch 30000	Time: 607 s	Agent Eps: 0.01	Train Win Rate: 80.52%	Test Win Rate: 37.84%	Test Mean Steps: 4.45

Batch 35000	Time: 691 s	Agent Eps: 0.01	Train Win Rate: 87.62%	Test Win Rate: 38.74%	Test Mean Steps: 4.63

Batch 40000	Time: 771 s	Agent Eps: 0.01	Train Win Rate: 91.42%	Test Win Rate: 40.24%	Test Mean Steps: 4.60


### With Overfitting

In [None]:
n_guesses = 2000
data = get_data(n_guesses, overfit=True)

In [None]:
nickname = Trainer.train(
    data=data,
    
    n_batches=40000,
    n_batches_warm=10,
    
    eps_start=1,
    eps_end=0.01,
    eps_decay=0.95,
    
    n_envs=8,
    k=1, 
    optimize_interval=8,

    agent_path=None
)

### 120-model as backbone for 4782-model

In [None]:
# agent.load_backbone(model_path)