# Testbed

In [1]:
%load_ext autoreload
%autoreload 2

from functools import partial

from wordle.wordlenp import Wordle
from environment.environment import Environment, StateYesNo, StateVocabulary, ActionVocabulary
from dqn.agent import Agent
from dqn.train import train, test

import torch
import numpy as np
np.random.seed(0)

## Датасеты

In [2]:
word_list = Wordle._load_vocabulary('wordle/guesses.txt', astype=np.array)

def make_data(n_answers, n_guesses):
    guesses = np.random.choice(word_list, size=n_guesses, replace=False)
    answers = np.random.choice(guesses, size=n_answers, replace=False)
    return answers, guesses

### 10 answers, 100 guesses

In [3]:
answers_10_100, guesses_10_100 = make_data(10, 100)
print(answers_10_100)

['abmho' 'cites' 'aware' 'blays' 'acker' 'rawin' 'anile' 'eorls' 'feers'
 'sadza']


### 100 answers, 100 guesses

In [4]:
answers_100_100, guesses_100_100 = make_data(100, 100)
print(answers_100_100)

['lilac' 'orles' 'twirl' 'unled' 'sings' 'grind' 'sheaf' 'benny' 'slews'
 'karst' 'rimus' 'lossy' 'joker' 'leash' 'scopa' 'viols' 'giron' 'raiks'
 'lummy' 'renig' 'tinds' 'infos' 'logon' 'drill' 'gudes' 'ammon' 'bhoot'
 'hurry' 'noils' 'coven' 'beryl' 'margs' 'sorbo' 'momes' 'scald' 'potch'
 'flows' 'torus' 'prill' 'scuts' 'brith' 'tamin' 'sewar' 'joram' 'aldol'
 'hazel' 'texes' 'sibbs' 'truth' 'spoil' 'hames' 'actin' 'maces' 'rayas'
 'thuya' 'sugan' 'felly' 'newsy' 'bolos' 'mimeo' 'chems' 'dicty' 'liefs'
 'scuff' 'burps' 'abyes' 'zones' 'cuspy' 'kerve' 'haith' 'amino' 'zygal'
 'kokum' 'zambo' 'icier' 'piers' 'sambo' 'laden' 'barge' 'solei' 'mauts'
 'groat' 'pearl' 'curse' 'jujus' 'troop' 'bilge' 'sibyl' 'gassy' 'elain'
 'daube' 'feyly' 'duals' 'hoper' 'hains' 'beige' 'poove' 'miffy' 'lesbo'
 'dawds']


### 50 answers, 200 guesses

In [5]:
answers_50_200, guesses_50_200 = make_data(50, 200)
print(answers_50_200)

['toran' 'burka' 'umpie' 'brock' 'civic' 'beige' 'hoiks' 'biffo' 'nagas'
 'sheol' 'malls' 'matzo' 'peeve' 'deshi' 'mooli' 'scaud' 'ameba' 'wadds'
 'bayts' 'glees' 'kaput' 'bitos' 'comae' 'dosed' 'rabis' 'neats' 'tutti'
 'stays' 'smoky' 'chase' 'resaw' 'simas' 'sowne' 'rorid' 'rebec' 'deawy'
 'hinny' 'sores' 'cerge' 'yogas' 'fouet' 'wheel' 'sowfs' 'talus' 'yabas'
 'topee' 'sabin' 'unbox' 'dyers' 'qophs']


### 200 answers, 200 guesses

In [6]:
answers_200_200, guesses_200_200 = make_data(200, 200)
print(answers_200_200)

['unled' 'shirk' 'horks' 'veena' 'winna' 'amate' 'scape' 'sunup' 'dukka'
 'alway' 'typey' 'sated' 'sepic' 'vagal' 'dangs' 'bitts' 'giust' 'amice'
 'roper' 'molys' 'moira' 'rosed' 'salut' 'cling' 'moles' 'azuki' 'chelp'
 'lurve' 'mavin' 'scath' 'empty' 'stele' 'deoxy' 'zuzim' 'roupy' 'briar'
 'anode' 'chest' 'moola' 'south' 'grimy' 'chats' 'joint' 'teuch' 'ticca'
 'drouk' 'shops' 'fleer' 'olent' 'mucus' 'tryps' 'tices' 'broil' 'thema'
 'rangy' 'ceric' 'jelly' 'ohmic' 'repay' 'drops' 'wills' 'aulos' 'hoied'
 'trefa' 'strut' 'nutsy' 'twoer' 'anomy' 'adhan' 'ingot' 'hooey' 'sluts'
 'sicks' 'cotes' 'bodge' 'weber' 'agism' 'texas' 'tommy' 'plumy' 'amido'
 'cobbs' 'mamma' 'repot' 'whist' 'bloat' 'welts' 'flexo' 'loves' 'hussy'
 'sower' 'noahs' 'roses' 'wents' 'chase' 'manul' 'muons' 'silds' 'yacka'
 'okapi' 'maqui' 'mbira' 'ponga' 'romal' 'schul' 'sonny' 'coram' 'hived'
 'lupus' 'segol' 'kybos' 'birch' 'leeze' 'golly' 'yokes' 'pulse' 'onely'
 'curve' 'bonds' 'cosey' 'lemon' 'cruds' 'bachs' 't

### 100 answers, 2000 guesses

In [7]:
answers_100_2000, guesses_100_2000 = make_data(100, 2000)
print(answers_100_2000)

['layed' 'blady' 'welks' 'seame' 'ahing' 'stong' 'basks' 'oracy' 'trone'
 'coved' 'agita' 'trust' 'kayak' 'giros' 'domed' 'yamen' 'delay' 'navvy'
 'carby' 'tizes' 'hoses' 'daffs' 'diver' 'notch' 'slink' 'namma' 'drown'
 'carks' 'pepsi' 'hoors' 'erect' 'benis' 'panty' 'cowps' 'amble' 'bitte'
 'braid' 'murrs' 'coomb' 'stylo' 'imine' 'gatch' 'claes' 'smirr' 'cotts'
 'fiats' 'paths' 'zoeae' 'cuing' 'dauts' 'boink' 'scops' 'staps' 'sanes'
 'saute' 'gauss' 'shily' 'icing' 'pleat' 'signa' 'araba' 'flirt' 'begot'
 'flier' 'ahent' 'mvule' 'kiack' 'glair' 'harim' 'sibyl' 'sudds' 'fines'
 'carps' 'atlas' 'cloye' 'seams' 'wawas' 'repin' 'vives' 'linch' 'unwit'
 'coapt' 'derth' 'deoxy' 'tases' 'pulmo' 'rhyne' 'unify' 'taish' 'gucks'
 'hovel' 'culms' 'ennog' 'oonts' 'honor' 'afald' 'plain' 'fusee' 'utter'
 'whins']


## Rewards

In [8]:
soft_rewards = {'B':1, 'Y':2, 'G':3, 'win':20, 'lose':-10, 'step':-4}
hard_rewards = {'B':0, 'Y':0, 'G':0, 'win':10, 'lose':-10, 'step':-2}
step_rewards = {'B':0, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}
char_rewards = {'B':0, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-2}

In [27]:
def experiment(rewards, answers, guesses, n_episodes, logging_level=None):
  env = Environment(
      rewards=rewards,  # !
      wordle=Wordle(vocabulary=guesses, answers=answers),
      state_instance=StateYesNo()
  )

  agent = Agent(
      state_size=env.state.size,
      action_size=len(guesses),
      action_constructor=partial(ActionVocabulary, vocabulary=guesses)
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

### Soft

In [28]:
experiment(soft_rewards, answers_10_100, guesses_10_100, 5000)


Episode  500	Score: 12.88	Success Rate: 69.0%	Duration: 23.5 s

Episode 1000	Score: 15.33	Success Rate: 75.0%	Duration: 41.8 s

Episode 1500	Score: 19.52	Success Rate: 86.0%	Duration: 60.2 s

Episode 2000	Score: 23.09	Success Rate: 94.0%	Duration: 78.7 s

Episode 2500	Score: 21.20	Success Rate: 89.0%	Duration: 98.0 s

Episode 3000	Score: 22.01	Success Rate: 90.0%	Duration: 120.1 s

Episode 3500	Score: 22.53	Success Rate: 91.0%	Duration: 140.6 s

Episode 4000	Score: 24.83	Success Rate: 97.0%	Duration: 163.0 s

Episode 4500	Score: 24.18	Success Rate: 95.0%	Duration: 185.9 s

Episode 5000	Score: 26.14	Success Rate: 99.0%	Duration: 214.2 s
Success: 9 / 10 (90.0000%)	Steps: 4.8889	Time: 0.1 s	Saved to: test18-03-2023-08:54:38.txt



In [29]:
experiment(soft_rewards, answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -1.92	Success Rate: 18.0%	Duration: 119.6 s

Episode 4000	Score: 3.61	Success Rate: 33.0%	Duration: 239.6 s

Episode 6000	Score: 9.42	Success Rate: 50.0%	Duration: 357.7 s

Episode 8000	Score: 10.80	Success Rate: 53.0%	Duration: 471.9 s

Episode 10000	Score: 13.80	Success Rate: 64.0%	Duration: 584.8 s

Episode 12000	Score: 15.84	Success Rate: 69.0%	Duration: 698.1 s

Episode 14000	Score: 18.10	Success Rate: 76.0%	Duration: 812.4 s

Episode 16000	Score: 17.27	Success Rate: 75.0%	Duration: 927.9 s

Episode 18000	Score: 17.45	Success Rate: 75.0%	Duration: 1045.6 s

Episode 20000	Score: 16.96	Success Rate: 73.0%	Duration: 1169.8 s
Success: 78 / 100 (78.0000%)	Steps: 4.2564	Time: 0.6 s	Saved to: test18-03-2023-09:14:08.txt



### Hard

In [30]:
experiment(hard_rewards, answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -6.78	Success Rate: 47.0%	Duration: 20.7 s

Episode 1000	Score: 2.46	Success Rate: 80.0%	Duration: 38.2 s

Episode 1500	Score: 7.16	Success Rate: 98.0%	Duration: 54.0 s

Episode 2000	Score: 7.96	Success Rate: 100.0%	Duration: 68.7 s

Episode 2500	Score: 7.98	Success Rate: 100.0%	Duration: 98.5 s

Episode 3000	Score: 7.46	Success Rate: 100.0%	Duration: 120.3 s

Episode 3500	Score: 5.24	Success Rate: 91.0%	Duration: 132.0 s

Episode 4000	Score: 8.28	Success Rate: 100.0%	Duration: 142.9 s

Episode 4500	Score: 5.28	Success Rate: 94.0%	Duration: 155.8 s

Episode 5000	Score: 5.98	Success Rate: 94.0%	Duration: 169.3 s
Success: 10 / 10 (100.0000%)	Steps: 1.9000	Time: 0.0 s	Saved to: test18-03-2023-09:16:59.txt



In [31]:
experiment(hard_rewards, answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -15.00	Success Rate: 19.0%	Duration: 118.7 s

Episode 4000	Score: -15.92	Success Rate: 16.0%	Duration: 260.5 s

Episode 6000	Score: -12.56	Success Rate: 30.0%	Duration: 394.1 s

Episode 8000	Score: -6.36	Success Rate: 53.0%	Duration: 515.8 s

Episode 10000	Score: -8.86	Success Rate: 44.0%	Duration: 655.3 s

Episode 12000	Score: -5.88	Success Rate: 54.0%	Duration: 770.8 s

Episode 14000	Score: -5.38	Success Rate: 58.0%	Duration: 885.3 s

Episode 16000	Score: -7.42	Success Rate: 51.0%	Duration: 1027.4 s

Episode 18000	Score: -2.50	Success Rate: 70.0%	Duration: 1142.6 s

Episode 20000	Score: -2.66	Success Rate: 71.0%	Duration: 1247.2 s
Success: 68 / 100 (68.0000%)	Steps: 3.8088	Time: 0.5 s	Saved to: test18-03-2023-09:37:47.txt



### Step

In [35]:
experiment(step_rewards, answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -7.31	Success Rate: 65.0%	RMSE: 3.944	Time: 20.1 s

Episode 1000	Score: 0.12	Success Rate: 84.0%	RMSE: 3.605	Time: 36.7 s

Episode 1500	Score: 1.31	Success Rate: 84.0%	RMSE: 3.811	Time: 52.3 s

Episode 2000	Score: 6.46	Success Rate: 100.0%	RMSE: 4.146	Time: 67.2 s

Episode 2500	Score: 7.21	Success Rate: 100.0%	RMSE: 3.283	Time: 81.9 s

Episode 3000	Score: 6.73	Success Rate: 100.0%	RMSE: 2.802	Time: 96.4 s

Episode 3500	Score: 5.61	Success Rate: 97.0%	RMSE: 2.467	Time: 109.9 s

Episode 4000	Score: 4.98	Success Rate: 96.0%	RMSE: 2.535	Time: 123.0 s

Episode 4500	Score: 5.95	Success Rate: 98.0%	RMSE: 2.622	Time: 134.0 s

Episode 5000	Score: 6.55	Success Rate: 99.0%	RMSE: 2.467	Time: 144.6 s

Success: 10 / 10 (100.0000%)	Steps: 2.1000	Time: 0.0 s	Saved to: test18-03-2023-10:21:38.txt



In [32]:
experiment(step_rewards, answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -26.58	Success Rate: 15.0%	Duration: 116.9 s

Episode 4000	Score: -26.07	Success Rate: 15.0%	Duration: 233.6 s

Episode 6000	Score: -17.40	Success Rate: 39.0%	Duration: 347.3 s

Episode 8000	Score: -17.50	Success Rate: 39.0%	Duration: 462.2 s

Episode 10000	Score: -10.93	Success Rate: 61.0%	Duration: 566.8 s

Episode 12000	Score: -15.23	Success Rate: 45.0%	Duration: 657.9 s

Episode 14000	Score: -7.27	Success Rate: 68.0%	Duration: 757.3 s

Episode 16000	Score: -4.74	Success Rate: 75.0%	Duration: 850.6 s

Episode 18000	Score: -2.39	Success Rate: 80.0%	Duration: 944.3 s

Episode 20000	Score: -1.87	Success Rate: 84.0%	Duration: 1036.3 s
Success: 83 / 100 (83.0000%)	Steps: 2.7590	Time: 0.4 s	Saved to: test18-03-2023-09:55:05.txt



### Char

In [34]:
experiment(char_rewards, answers_10_100, guesses_10_100, 5000)


Episode  500	Score: 1.18	Success Rate: 68.0%	RMSE: 3.155012607574463	Time: 32.2 s

Episode 1000	Score: 3.36	Success Rate: 76.0%	RMSE: 3.106156349182129	Time: 54.7 s

Episode 1500	Score: 1.26	Success Rate: 67.0%	RMSE: 3.114962100982666	Time: 73.9 s

Episode 2000	Score: 4.18	Success Rate: 77.0%	RMSE: 3.101381778717041	Time: 95.0 s

Episode 2500	Score: 5.56	Success Rate: 86.0%	RMSE: 3.2494874000549316	Time: 115.8 s

Episode 3000	Score: 5.97	Success Rate: 89.0%	RMSE: 2.5868852138519287	Time: 136.1 s

Episode 3500	Score: 8.27	Success Rate: 98.0%	RMSE: 3.5016274452209473	Time: 157.2 s

Episode 4000	Score: 8.09	Success Rate: 96.0%	RMSE: 3.2956247329711914	Time: 175.3 s

Episode 4500	Score: 8.21	Success Rate: 97.0%	RMSE: 4.088204383850098	Time: 192.7 s

Episode 5000	Score: 8.78	Success Rate: 97.0%	RMSE: 3.91351318359375	Time: 210.2 s

Success: 10 / 10 (100.0000%)	Steps: 3.1000	Time: 0.0 s	Saved to: test18-03-2023-10:19:12.txt



In [33]:
experiment(char_rewards, answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -10.95	Success Rate: 19.0%	Duration: 120.0 s

Episode 4000	Score: -8.20	Success Rate: 32.0%	Duration: 243.4 s

Episode 6000	Score: -5.49	Success Rate: 43.0%	Duration: 361.8 s

Episode 8000	Score: -2.86	Success Rate: 53.0%	Duration: 475.3 s

Episode 10000	Score: -1.39	Success Rate: 60.0%	Duration: 597.6 s

Episode 12000	Score: -0.15	Success Rate: 64.0%	Duration: 711.2 s

Episode 14000	Score: 0.90	Success Rate: 70.0%	Duration: 828.7 s

Episode 16000	Score: 2.03	Success Rate: 74.0%	Duration: 945.2 s

Episode 18000	Score: 3.21	Success Rate: 81.0%	Duration: 1057.1 s

Episode 20000	Score: 3.04	Success Rate: 80.0%	Duration: 1176.1 s
Success: 69 / 100 (69.0000%)	Steps: 3.9130	Time: 0.9 s	Saved to: test18-03-2023-10:14:42.txt



## States

### StateYesNo

In [36]:
def experiment(answers, guesses, n_episodes, logging_level=None):
    env = Environment(
        rewards=step_rewards,
        wordle=Wordle(vocabulary=guesses, answers=answers),
        state_instance=StateYesNo()
    )

    agent = Agent(
        state_size=env.state.size,
        action_size=len(env.wordle.vocabulary),
        action_constructor=partial(ActionVocabulary, vocabulary=guesses)
    )

    train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
    test(env=env, agent=agent)

In [37]:
experiment(answers_10_100, guesses_10_100, 5000)


Episode  500	Score: 0.73	Success Rate: 86.0%	RMSE: 3.427	Time: 18.9 s

Episode 1000	Score: 7.05	Success Rate: 100.0%	RMSE: 3.742	Time: 31.7 s

Episode 1500	Score: 6.86	Success Rate: 100.0%	RMSE: 3.417	Time: 42.4 s

Episode 2000	Score: 7.12	Success Rate: 100.0%	RMSE: 2.557	Time: 52.9 s

Episode 2500	Score: 7.04	Success Rate: 100.0%	RMSE: 2.533	Time: 63.9 s

Episode 3000	Score: 6.55	Success Rate: 99.0%	RMSE: 2.090	Time: 74.8 s

Episode 3500	Score: 7.31	Success Rate: 100.0%	RMSE: 2.267	Time: 85.6 s

Episode 4000	Score: 6.72	Success Rate: 99.0%	RMSE: 2.424	Time: 96.9 s

Episode 4500	Score: 6.42	Success Rate: 99.0%	RMSE: 1.710	Time: 107.9 s

Episode 5000	Score: 6.96	Success Rate: 99.0%	RMSE: 2.202	Time: 118.8 s

Success: 9 / 10 (90.0000%)	Steps: 2.2222	Time: 0.0 s	Saved to: test18-03-2023-10:46:38.txt



In [38]:
experiment(answers_50_200, guesses_50_200, 20000)


Episode 2000	Score: -26.24	Success Rate: 16.0%	RMSE: 4.531	Time: 116.7 s

Episode 4000	Score: -20.15	Success Rate: 33.0%	RMSE: 6.672	Time: 246.8 s

Episode 6000	Score: -15.66	Success Rate: 44.0%	RMSE: 6.629	Time: 355.7 s

Episode 8000	Score: -12.74	Success Rate: 52.0%	RMSE: 6.569	Time: 463.5 s

Episode 10000	Score: -11.04	Success Rate: 58.0%	RMSE: 6.947	Time: 566.7 s

Episode 12000	Score: -6.55	Success Rate: 71.0%	RMSE: 6.467	Time: 664.8 s

Episode 14000	Score: -8.23	Success Rate: 65.0%	RMSE: 6.579	Time: 764.7 s

Episode 16000	Score: -9.31	Success Rate: 65.0%	RMSE: 6.394	Time: 868.0 s

Episode 18000	Score: -6.10	Success Rate: 75.0%	RMSE: 5.482	Time: 966.1 s

Episode 20000	Score: -6.78	Success Rate: 70.0%	RMSE: 6.890	Time: 1062.4 s

Success: 37 / 50 (74.0000%)	Steps: 2.6216	Time: 0.2 s	Saved to: test18-03-2023-11:04:21.txt



### StateVocabulary

In [39]:
def experiment(answers, guesses, n_episodes, logging_level=None):
    env = Environment(
        rewards=step_rewards,
        wordle=Wordle(vocabulary=guesses, answers=answers),
        state_instance=StateVocabulary(
            answers_mask=np.fromiter((ans in guesses for ans in answers), dtype=bool)
        )
    )

    agent = Agent(
        state_size=env.state.size,
        action_size=len(env.wordle.vocabulary),
        action_constructor=partial(ActionVocabulary, vocabulary=guesses)
    )

    train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
    test(env=env, agent=agent)

In [40]:
experiment(answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -10.41	Success Rate: 55.0%	RMSE: 3.617	Time: 23.3 s

Episode 1000	Score: -3.07	Success Rate: 74.0%	RMSE: 3.275	Time: 47.0 s

Episode 1500	Score: 1.41	Success Rate: 86.0%	RMSE: 4.449	Time: 66.5 s

Episode 2000	Score: 4.79	Success Rate: 97.0%	RMSE: 4.595	Time: 82.6 s

Episode 2500	Score: 6.33	Success Rate: 100.0%	RMSE: 4.426	Time: 96.3 s

Episode 3000	Score: 6.39	Success Rate: 100.0%	RMSE: 3.272	Time: 111.4 s

Episode 3500	Score: 6.43	Success Rate: 100.0%	RMSE: 3.361	Time: 124.9 s

Episode 4000	Score: 6.76	Success Rate: 100.0%	RMSE: 3.545	Time: 137.5 s

Episode 4500	Score: 6.23	Success Rate: 98.0%	RMSE: 2.980	Time: 150.2 s

Episode 5000	Score: 6.70	Success Rate: 100.0%	RMSE: 3.502	Time: 163.7 s

Success: 10 / 10 (100.0000%)	Steps: 1.9000	Time: 0.0 s	Saved to: test18-03-2023-11:07:06.txt



In [41]:
experiment(answers_50_200, guesses_50_200, 20000)


Episode 2000	Score: -26.94	Success Rate: 13.0%	RMSE: 3.828	Time: 142.2 s

Episode 4000	Score: -20.92	Success Rate: 29.0%	RMSE: 7.478	Time: 285.8 s

Episode 6000	Score: -14.95	Success Rate: 46.0%	RMSE: 6.442	Time: 420.4 s

Episode 8000	Score: -14.35	Success Rate: 50.0%	RMSE: 6.219	Time: 562.8 s

Episode 10000	Score: -9.74	Success Rate: 61.0%	RMSE: 6.844	Time: 695.1 s

Episode 12000	Score: -7.15	Success Rate: 68.0%	RMSE: 5.755	Time: 847.4 s

Episode 14000	Score: -4.06	Success Rate: 78.0%	RMSE: 6.517	Time: 984.7 s

Episode 16000	Score: -4.96	Success Rate: 76.0%	RMSE: 5.759	Time: 1123.9 s

Episode 18000	Score: -3.10	Success Rate: 80.0%	RMSE: 5.481	Time: 1263.5 s

Episode 20000	Score: -0.34	Success Rate: 89.0%	RMSE: 5.223	Time: 1389.3 s

Success: 42 / 50 (84.0000%)	Steps: 3.0714	Time: 0.3 s	Saved to: test18-03-2023-11:30:16.txt



## Replay Sampling

### Uniform

In [9]:
def experiment(answers, guesses, n_episodes, logging_level=None):
    env = Environment(
        rewards=step_rewards,
        wordle=Wordle(vocabulary=guesses, answers=answers),
        state_instance=StateVocabulary(
            answers_mask=np.fromiter((ans in guesses for ans in answers), dtype=bool)
        )
    )

    agent = Agent(
        state_size=env.state.size,
        action_size=len(env.wordle.vocabulary),
        action_constructor=partial(ActionVocabulary, vocabulary=guesses),
        priority_rate={
            'alpha': 0,
            'alpha_decay_rate': 1,
            'beta': 1,
            'beta_growth_rate': 1
        }
    )

    train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
    test(env=env, agent=agent)

In [13]:
experiment(answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -9.68	Success Rate: 58.0%	RMSE: 2.453	Time: 41.7 s

Episode 1000	Score: 1.96	Success Rate: 88.0%	RMSE: 2.607	Time: 75.1 s

Episode 1500	Score: 5.31	Success Rate: 97.0%	RMSE: 1.837	Time: 107.3 s

Episode 2000	Score: 6.52	Success Rate: 100.0%	RMSE: 2.177	Time: 140.6 s

Episode 2500	Score: 6.00	Success Rate: 99.0%	RMSE: 2.706	Time: 166.7 s

Episode 3000	Score: 6.65	Success Rate: 100.0%	RMSE: 2.426	Time: 189.5 s

Episode 3500	Score: 5.16	Success Rate: 98.0%	RMSE: 1.760	Time: 216.4 s

Episode 4000	Score: 4.33	Success Rate: 96.0%	RMSE: 2.397	Time: 243.7 s

Episode 4500	Score: 6.83	Success Rate: 100.0%	RMSE: 1.842	Time: 269.6 s

Episode 5000	Score: 6.31	Success Rate: 100.0%	RMSE: 1.724	Time: 297.9 s

Success: 10 / 10 (100.0000%)	Steps: 2.0000	Time: 0.1 s	Saved to: test18-03-2023-20:32:59.txt



In [14]:
experiment(answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -30.22	Success Rate: 6.0%	RMSE: 3.373	Time: 270.4 s

Episode 4000	Score: -26.53	Success Rate: 15.0%	RMSE: 3.897	Time: 527.2 s

Episode 6000	Score: -24.81	Success Rate: 19.0%	RMSE: 4.236	Time: 776.6 s

Episode 8000	Score: -19.07	Success Rate: 35.0%	RMSE: 6.821	Time: 1016.4 s

Episode 10000	Score: -14.18	Success Rate: 48.0%	RMSE: 5.566	Time: 1310.4 s

Episode 12000	Score: -16.22	Success Rate: 44.0%	RMSE: 6.003	Time: 1580.5 s

Episode 14000	Score: -15.15	Success Rate: 46.0%	RMSE: 6.170	Time: 1801.1 s

Episode 16000	Score: -9.51	Success Rate: 63.0%	RMSE: 4.458	Time: 2010.0 s

Episode 18000	Score: -8.50	Success Rate: 65.0%	RMSE: 4.228	Time: 2224.4 s

Episode 20000	Score: -8.41	Success Rate: 64.0%	RMSE: 4.630	Time: 2454.0 s

Success: 67 / 100 (67.0000%)	Steps: 2.7612	Time: 1.1 s	Saved to: test18-03-2023-21:14:48.txt



### A Little Prioritized

In [15]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=step_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={
      'alpha': 0.3,
      'alpha_decay_rate': 0.95,
      'beta': 1,
      'beta_growth_rate': 1
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

In [23]:
experiment(answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -11.40	Success Rate: 53.0%	RMSE: 2.376	Time: 23.7 s

Episode 1000	Score: -3.35	Success Rate: 75.0%	RMSE: 3.608	Time: 45.3 s

Episode 1500	Score: 4.86	Success Rate: 94.0%	RMSE: 3.738	Time: 64.3 s

Episode 2000	Score: 7.51	Success Rate: 100.0%	RMSE: 2.834	Time: 75.9 s

Episode 2500	Score: 6.28	Success Rate: 99.0%	RMSE: 2.660	Time: 91.0 s

Episode 3000	Score: 6.97	Success Rate: 100.0%	RMSE: 3.216	Time: 105.8 s

Episode 3500	Score: 6.88	Success Rate: 100.0%	RMSE: 3.262	Time: 117.1 s

Episode 4000	Score: 6.71	Success Rate: 100.0%	RMSE: 3.361	Time: 128.6 s

Episode 4500	Score: 7.18	Success Rate: 100.0%	RMSE: 2.358	Time: 142.9 s

Episode 5000	Score: 6.70	Success Rate: 100.0%	RMSE: 2.715	Time: 156.1 s

Success: 10 / 10 (100.0000%)	Steps: 2.0000	Time: 0.0 s	Saved to: test18-03-2023-19:32:50.txt



In [16]:
experiment(answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -27.83	Success Rate: 10.4%	RMSE: 2.864	Time: 239.2 s

Episode 4000	Score: -23.79	Success Rate: 22.6%	RMSE: 5.163	Time: 460.9 s

Episode 6000	Score: -19.49	Success Rate: 34.5%	RMSE: 3.610	Time: 677.2 s

Episode 8000	Score: -16.73	Success Rate: 42.4%	RMSE: 4.134	Time: 875.0 s

Episode 10000	Score: -14.85	Success Rate: 48.6%	RMSE: 4.203	Time: 1071.5 s

Episode 12000	Score: -12.74	Success Rate: 54.7%	RMSE: 5.986	Time: 1267.0 s

Episode 14000	Score: -10.74	Success Rate: 61.1%	RMSE: 6.155	Time: 1465.9 s

Episode 16000	Score: -7.93	Success Rate: 68.3%	RMSE: 4.236	Time: 1649.1 s

Episode 18000	Score: -6.55	Success Rate: 72.4%	RMSE: 3.902	Time: 1869.4 s

Episode 20000	Score: -3.46	Success Rate: 79.5%	RMSE: 4.418	Time: 2034.2 s

Success: 80 / 100 (80.0000%)	Steps: 2.8875	Time: 1.0 s	Saved to: test18-03-2023-22:03:42.txt



### Middle Prioritized

In [19]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=step_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={
      'alpha': 0.6,
      'alpha_decay_rate': 0.95,
      'beta': 1,
      'beta_growth_rate': 1
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

In [20]:
experiment(answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -16.45	Success Rate: 40.6%	RMSE: 3.893	Time: 36.5 s

Episode 1000	Score: -3.55	Success Rate: 74.2%	RMSE: 3.556	Time: 74.6 s

Episode 1500	Score: 2.54	Success Rate: 90.2%	RMSE: 3.762	Time: 106.0 s

Episode 2000	Score: 2.61	Success Rate: 90.2%	RMSE: 4.109	Time: 135.4 s

Episode 2500	Score: 4.88	Success Rate: 96.6%	RMSE: 4.043	Time: 156.9 s

Episode 3000	Score: 6.20	Success Rate: 99.4%	RMSE: 3.444	Time: 177.7 s

Episode 3500	Score: 6.82	Success Rate: 99.4%	RMSE: 3.496	Time: 199.2 s

Episode 4000	Score: 6.61	Success Rate: 99.0%	RMSE: 3.239	Time: 219.0 s

Episode 4500	Score: 7.22	Success Rate: 99.8%	RMSE: 3.302	Time: 237.5 s

Episode 5000	Score: 6.73	Success Rate: 98.8%	RMSE: 2.657	Time: 256.3 s

Success: 10 / 10 (100.0000%)	Steps: 2.0000	Time: 0.0 s	Saved to: test18-03-2023-22:15:39.txt



In [21]:
experiment(answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -26.94	Success Rate: 12.7%	RMSE: 4.490	Time: 180.0 s

Episode 4000	Score: -22.83	Success Rate: 25.2%	RMSE: 6.281	Time: 362.7 s

Episode 6000	Score: -19.34	Success Rate: 35.4%	RMSE: 5.401	Time: 538.6 s

Episode 8000	Score: -16.82	Success Rate: 42.3%	RMSE: 5.463	Time: 709.8 s

Episode 10000	Score: -13.79	Success Rate: 51.1%	RMSE: 5.431	Time: 873.1 s

Episode 12000	Score: -11.37	Success Rate: 57.9%	RMSE: 5.457	Time: 1032.5 s

Episode 14000	Score: -9.88	Success Rate: 63.8%	RMSE: 5.039	Time: 1185.2 s

Episode 16000	Score: -7.72	Success Rate: 68.3%	RMSE: 6.175	Time: 1332.8 s

Episode 18000	Score: -7.01	Success Rate: 71.3%	RMSE: 5.104	Time: 1477.9 s

Episode 20000	Score: -3.82	Success Rate: 78.8%	RMSE: 6.014	Time: 1609.8 s

Success: 81 / 100 (81.0000%)	Steps: 2.8642	Time: 0.6 s	Saved to: test18-03-2023-22:42:30.txt



### Much Prioritized

In [22]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=step_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={
      'alpha': 0.9,
      'alpha_decay_rate': 0.95,
      'beta': 1,
      'beta_growth_rate': 1
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

In [23]:
experiment(answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -18.32	Success Rate: 35.0%	RMSE: 4.363	Time: 29.7 s

Episode 1000	Score: -7.21	Success Rate: 63.4%	RMSE: 5.846	Time: 59.6 s

Episode 1500	Score: -0.90	Success Rate: 82.0%	RMSE: 5.946	Time: 84.2 s

Episode 2000	Score: 2.70	Success Rate: 89.6%	RMSE: 4.664	Time: 107.5 s

Episode 2500	Score: 2.01	Success Rate: 88.0%	RMSE: 5.397	Time: 135.8 s

Episode 3000	Score: 1.45	Success Rate: 88.2%	RMSE: 4.961	Time: 163.6 s

Episode 3500	Score: 4.13	Success Rate: 94.2%	RMSE: 6.029	Time: 185.8 s

Episode 4000	Score: 5.85	Success Rate: 98.8%	RMSE: 4.352	Time: 207.2 s

Episode 4500	Score: 6.71	Success Rate: 99.8%	RMSE: 4.008	Time: 227.6 s

Episode 5000	Score: 6.74	Success Rate: 99.4%	RMSE: 3.625	Time: 248.4 s

Success: 10 / 10 (100.0000%)	Steps: 2.1000	Time: 0.0 s	Saved to: test18-03-2023-22:46:40.txt



In [24]:
experiment(answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -27.04	Success Rate: 12.8%	RMSE: 5.135	Time: 223.6 s

Episode 4000	Score: -23.47	Success Rate: 23.6%	RMSE: 6.813	Time: 461.5 s

Episode 6000	Score: -19.67	Success Rate: 34.3%	RMSE: 6.054	Time: 701.8 s

Episode 8000	Score: -16.94	Success Rate: 42.5%	RMSE: 5.090	Time: 954.2 s

Episode 10000	Score: -14.11	Success Rate: 50.8%	RMSE: 5.346	Time: 1190.2 s

Episode 12000	Score: -10.90	Success Rate: 60.1%	RMSE: 5.577	Time: 1404.1 s

Episode 14000	Score: -8.88	Success Rate: 66.3%	RMSE: 5.411	Time: 1621.0 s

Episode 16000	Score: -5.81	Success Rate: 73.9%	RMSE: 4.778	Time: 1812.3 s

Episode 18000	Score: -5.17	Success Rate: 76.3%	RMSE: 4.167	Time: 1970.6 s

Episode 20000	Score: -5.18	Success Rate: 76.1%	RMSE: 5.223	Time: 2164.0 s

Success: 80 / 100 (80.0000%)	Steps: 2.8250	Time: 0.8 s	Saved to: test18-03-2023-23:22:46.txt



## Steps

Здесь используем оптимальные параметры с предыдущих экспериментов.

### 6 on train, 6 on test

In [30]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=step_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(guesses),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses)
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

In [31]:
experiment(answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -20.55	Success Rate: 28.8%	RMSE: 3.120	Time: 42.7 s

Episode 1000	Score: -6.95	Success Rate: 65.8%	RMSE: 4.944	Time: 77.1 s

Episode 1500	Score: -3.61	Success Rate: 73.8%	RMSE: 5.414	Time: 112.8 s

Episode 2000	Score: -0.14	Success Rate: 83.6%	RMSE: 5.784	Time: 139.9 s

Episode 2500	Score: 1.78	Success Rate: 88.6%	RMSE: 5.940	Time: 166.8 s

Episode 3000	Score: 2.66	Success Rate: 91.2%	RMSE: 6.007	Time: 190.3 s

Episode 3500	Score: 3.33	Success Rate: 93.2%	RMSE: 4.608	Time: 215.3 s

Episode 4000	Score: 5.88	Success Rate: 99.6%	RMSE: 4.237	Time: 240.8 s

Episode 4500	Score: 6.06	Success Rate: 100.0%	RMSE: 5.285	Time: 262.4 s

Episode 5000	Score: 5.89	Success Rate: 99.6%	RMSE: 3.987	Time: 291.7 s

Success: 10 / 10 (100.0000%)	Steps: 2.1000	Time: 0.1 s	Saved to: test18-03-2023-23:34:24.txt



In [32]:
experiment(answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -27.22	Success Rate: 11.8%	RMSE: 5.782	Time: 221.3 s

Episode 4000	Score: -22.28	Success Rate: 25.7%	RMSE: 6.369	Time: 428.8 s

Episode 6000	Score: -19.03	Success Rate: 34.8%	RMSE: 8.823	Time: 638.6 s

Episode 8000	Score: -17.03	Success Rate: 40.4%	RMSE: 5.728	Time: 847.6 s

Episode 10000	Score: -12.76	Success Rate: 52.8%	RMSE: 6.141	Time: 1028.8 s

Episode 12000	Score: -9.30	Success Rate: 62.5%	RMSE: 6.190	Time: 1209.7 s

Episode 14000	Score: -8.06	Success Rate: 65.8%	RMSE: 7.095	Time: 1391.7 s

Episode 16000	Score: -6.87	Success Rate: 69.0%	RMSE: 5.983	Time: 1560.1 s

Episode 18000	Score: -6.08	Success Rate: 71.4%	RMSE: 6.317	Time: 1726.3 s

Episode 20000	Score: -5.44	Success Rate: 74.0%	RMSE: 5.949	Time: 1908.5 s

Success: 76 / 100 (76.0000%)	Steps: 2.7763	Time: 0.7 s	Saved to: test19-03-2023-00:06:14.txt



### 10 on train, 6 on test

In [33]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=step_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers, max_guesses=10),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(guesses),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  env.wordle.max_guesses = 6
  test(env=env, agent=agent)

In [34]:
experiment(answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -14.71	Success Rate: 66.2%	RMSE: 4.282	Time: 43.8 s

Episode 1000	Score: 6.48	Success Rate: 100.0%	RMSE: 4.394	Time: 62.4 s

Episode 1500	Score: 6.56	Success Rate: 100.0%	RMSE: 3.570	Time: 79.7 s

Episode 2000	Score: 6.67	Success Rate: 100.0%	RMSE: 3.498	Time: 97.1 s

Episode 2500	Score: 6.40	Success Rate: 99.6%	RMSE: 3.301	Time: 115.2 s

Episode 3000	Score: 6.29	Success Rate: 99.4%	RMSE: 2.979	Time: 141.0 s

Episode 3500	Score: 6.08	Success Rate: 99.4%	RMSE: 2.922	Time: 162.3 s

Episode 4000	Score: 6.09	Success Rate: 99.2%	RMSE: 2.255	Time: 180.9 s

Episode 4500	Score: 6.35	Success Rate: 99.4%	RMSE: 3.233	Time: 197.7 s

Episode 5000	Score: 6.49	Success Rate: 99.8%	RMSE: 2.848	Time: 214.9 s

Success: 10 / 10 (100.0000%)	Steps: 2.0000	Time: 0.0 s	Saved to: test19-03-2023-00:09:52.txt



In [35]:
experiment(answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -41.13	Success Rate: 18.9%	RMSE: 6.341	Time: 311.9 s

Episode 4000	Score: -36.80	Success Rate: 27.4%	RMSE: 5.775	Time: 628.7 s

Episode 6000	Score: -30.66	Success Rate: 39.1%	RMSE: 6.762	Time: 964.9 s

Episode 8000	Score: -24.20	Success Rate: 51.4%	RMSE: 5.983	Time: 1238.5 s

Episode 10000	Score: -18.75	Success Rate: 61.3%	RMSE: 4.822	Time: 1505.7 s

Episode 12000	Score: -15.00	Success Rate: 68.2%	RMSE: 4.803	Time: 1760.4 s

Episode 14000	Score: -14.36	Success Rate: 69.7%	RMSE: 6.252	Time: 2009.4 s

Episode 16000	Score: -10.59	Success Rate: 76.5%	RMSE: 4.842	Time: 2236.1 s
wha


UnboundLocalError: local variable 'states' referenced before assignment

### 15 on train, 6 on test

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=step_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers, max_guesses=15),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  env.wordle.max_guesses = 6
  test(env=env, agent=agent)

In [None]:
experiment(answers_10_100, guesses_10_100, 5000)

In [None]:
experiment(answers_100_100, guesses_100_100, 20000)

### stages on steps number

Появилась идея: сначала обучить на 6, затем на 10, и наконец на 15.

In [37]:
def steps_stage_train(answers, guesses, episodes_counts:dict):
  env = Environment(
    rewards=step_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(guesses),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
  )

  for steps, n_episodes in episodes_counts.items():
    env.wordle.max_guesses = steps
    train(env=env, agent=agent, n_episodes=n_episodes)
    env.wordle.max_guesses = 6
    test(env=env, agent=agent)

In [None]:
episodes_counts = {6: 15000, 10: 4000, 15: 1000}
steps_stage_train(answers_100_100, guesses_100_100, episodes_counts)

## Stages on answers list size

Фиксированный guesses размера 100. Меняющийся answers по порядку: 10, 40, 70, 100.

In [None]:
def answer_stage_train(
    guesses, start_size, end_size, n_stages, episodes_counts
)
  agent = Agent(
    state_size=StateYesNo().size + 100,
    action_size=len(guesses),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
  )

  for i in range(n_stages):
    t = i / (n_stages - 1)
    size = start_size * (1 - t) + end_size * t
    answers = guesses[:size]

    env = Environment(
      rewards=step_rewards,
      wordle=Wordle(
          vocabulary=guesses,
          answers=answers
      ),
      state_instance=StateVocabulary(
          answers_mask=np.fromiter((ans in guesses for ans in answers), dtype=bool)
      )
    )

    train(env=env, agent=agent, n_episodes=episodes_counts[i])
    test(env=env, agent=agent)
    print('\n============================')