# Testcase

In [19]:
%load_ext autoreload
%autoreload 2

from functools import partial

from wordle.wordlenp import Wordle
from environment.environment import Environment, StateYesNo, StateVocabulary, ActionVocabulary
from dqn.agent import Agent
from dqn.train import train, test

import torch
import numpy as np
np.random.seed(0)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Датасеты

In [20]:
word_list = Wordle._load_vocabulary('wordle/guesses.txt', astype=np.array)

def make_data(n_answers, n_guesses):
    guesses = np.random.choice(word_list, size=n_guesses, replace=False)
    answers = np.random.choice(guesses, size=n_answers, replace=False)
    return answers, guesses

### 10 answers, 100 guesses

In [21]:
answers_10_100, guesses_10_100 = make_data(10, 100)
print(answers_10_100)

['abmho' 'cites' 'aware' 'blays' 'acker' 'rawin' 'anile' 'eorls' 'feers'
 'sadza']


### 100 answers, 100 guesses

In [22]:
answers_100_100, guesses_100_100 = make_data(100, 100)
print(answers_100_100)

['lilac' 'orles' 'twirl' 'unled' 'sings' 'grind' 'sheaf' 'benny' 'slews'
 'karst' 'rimus' 'lossy' 'joker' 'leash' 'scopa' 'viols' 'giron' 'raiks'
 'lummy' 'renig' 'tinds' 'infos' 'logon' 'drill' 'gudes' 'ammon' 'bhoot'
 'hurry' 'noils' 'coven' 'beryl' 'margs' 'sorbo' 'momes' 'scald' 'potch'
 'flows' 'torus' 'prill' 'scuts' 'brith' 'tamin' 'sewar' 'joram' 'aldol'
 'hazel' 'texes' 'sibbs' 'truth' 'spoil' 'hames' 'actin' 'maces' 'rayas'
 'thuya' 'sugan' 'felly' 'newsy' 'bolos' 'mimeo' 'chems' 'dicty' 'liefs'
 'scuff' 'burps' 'abyes' 'zones' 'cuspy' 'kerve' 'haith' 'amino' 'zygal'
 'kokum' 'zambo' 'icier' 'piers' 'sambo' 'laden' 'barge' 'solei' 'mauts'
 'groat' 'pearl' 'curse' 'jujus' 'troop' 'bilge' 'sibyl' 'gassy' 'elain'
 'daube' 'feyly' 'duals' 'hoper' 'hains' 'beige' 'poove' 'miffy' 'lesbo'
 'dawds']


### 50 answers, 200 guesses

In [23]:
answers_50_200, guesses_50_200 = make_data(50, 200)
print(answers_50_200)

['toran' 'burka' 'umpie' 'brock' 'civic' 'beige' 'hoiks' 'biffo' 'nagas'
 'sheol' 'malls' 'matzo' 'peeve' 'deshi' 'mooli' 'scaud' 'ameba' 'wadds'
 'bayts' 'glees' 'kaput' 'bitos' 'comae' 'dosed' 'rabis' 'neats' 'tutti'
 'stays' 'smoky' 'chase' 'resaw' 'simas' 'sowne' 'rorid' 'rebec' 'deawy'
 'hinny' 'sores' 'cerge' 'yogas' 'fouet' 'wheel' 'sowfs' 'talus' 'yabas'
 'topee' 'sabin' 'unbox' 'dyers' 'qophs']


### 200 answers, 200 guesses

In [24]:
answers_200_200, guesses_200_200 = make_data(200, 200)
print(answers_200_200)

['unled' 'shirk' 'horks' 'veena' 'winna' 'amate' 'scape' 'sunup' 'dukka'
 'alway' 'typey' 'sated' 'sepic' 'vagal' 'dangs' 'bitts' 'giust' 'amice'
 'roper' 'molys' 'moira' 'rosed' 'salut' 'cling' 'moles' 'azuki' 'chelp'
 'lurve' 'mavin' 'scath' 'empty' 'stele' 'deoxy' 'zuzim' 'roupy' 'briar'
 'anode' 'chest' 'moola' 'south' 'grimy' 'chats' 'joint' 'teuch' 'ticca'
 'drouk' 'shops' 'fleer' 'olent' 'mucus' 'tryps' 'tices' 'broil' 'thema'
 'rangy' 'ceric' 'jelly' 'ohmic' 'repay' 'drops' 'wills' 'aulos' 'hoied'
 'trefa' 'strut' 'nutsy' 'twoer' 'anomy' 'adhan' 'ingot' 'hooey' 'sluts'
 'sicks' 'cotes' 'bodge' 'weber' 'agism' 'texas' 'tommy' 'plumy' 'amido'
 'cobbs' 'mamma' 'repot' 'whist' 'bloat' 'welts' 'flexo' 'loves' 'hussy'
 'sower' 'noahs' 'roses' 'wents' 'chase' 'manul' 'muons' 'silds' 'yacka'
 'okapi' 'maqui' 'mbira' 'ponga' 'romal' 'schul' 'sonny' 'coram' 'hived'
 'lupus' 'segol' 'kybos' 'birch' 'leeze' 'golly' 'yokes' 'pulse' 'onely'
 'curve' 'bonds' 'cosey' 'lemon' 'cruds' 'bachs' 't

### 100 answers, 2000 guesses

In [25]:
answers_100_2000, guesses_100_2000 = make_data(100, 2000)
print(answers_100_2000)

['layed' 'blady' 'welks' 'seame' 'ahing' 'stong' 'basks' 'oracy' 'trone'
 'coved' 'agita' 'trust' 'kayak' 'giros' 'domed' 'yamen' 'delay' 'navvy'
 'carby' 'tizes' 'hoses' 'daffs' 'diver' 'notch' 'slink' 'namma' 'drown'
 'carks' 'pepsi' 'hoors' 'erect' 'benis' 'panty' 'cowps' 'amble' 'bitte'
 'braid' 'murrs' 'coomb' 'stylo' 'imine' 'gatch' 'claes' 'smirr' 'cotts'
 'fiats' 'paths' 'zoeae' 'cuing' 'dauts' 'boink' 'scops' 'staps' 'sanes'
 'saute' 'gauss' 'shily' 'icing' 'pleat' 'signa' 'araba' 'flirt' 'begot'
 'flier' 'ahent' 'mvule' 'kiack' 'glair' 'harim' 'sibyl' 'sudds' 'fines'
 'carps' 'atlas' 'cloye' 'seams' 'wawas' 'repin' 'vives' 'linch' 'unwit'
 'coapt' 'derth' 'deoxy' 'tases' 'pulmo' 'rhyne' 'unify' 'taish' 'gucks'
 'hovel' 'culms' 'ennog' 'oonts' 'honor' 'afald' 'plain' 'fusee' 'utter'
 'whins']


## Rewards

Тестирую разные способы задания награды.

In [26]:
soft_rewards = {'B':1, 'Y':2, 'G':3, 'win':20, 'lose':-10, 'step':-4}
hard_rewards = {'B':0, 'Y':0, 'G':0, 'win':10, 'lose':-10, 'step':-2}
step_rewards = {'B':0, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-5}
char_rewards = {'B':0, 'Y':1, 'G':1, 'win':10, 'lose':-10, 'step':-2}

In [27]:
def experiment(rewards, answers, guesses, n_episodes, logging_level=None):
  env = Environment(
      rewards=rewards,  # !
      wordle=Wordle(vocabulary=guesses, answers=answers),
      state_instance=StateYesNo()
  )

  agent = Agent(
      state_size=env.state.size,
      action_size=len(guesses),
      action_constructor=partial(ActionVocabulary, vocabulary=guesses)
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

### Soft

In [28]:
experiment(soft_rewards, answers_10_100, guesses_10_100, 5000)


Episode  500	Score: 12.88	Success Rate: 69.0%	Duration: 23.5 s

Episode 1000	Score: 15.33	Success Rate: 75.0%	Duration: 41.8 s

Episode 1500	Score: 19.52	Success Rate: 86.0%	Duration: 60.2 s

Episode 2000	Score: 23.09	Success Rate: 94.0%	Duration: 78.7 s

Episode 2500	Score: 21.20	Success Rate: 89.0%	Duration: 98.0 s

Episode 3000	Score: 22.01	Success Rate: 90.0%	Duration: 120.1 s

Episode 3500	Score: 22.53	Success Rate: 91.0%	Duration: 140.6 s

Episode 4000	Score: 24.83	Success Rate: 97.0%	Duration: 163.0 s

Episode 4500	Score: 24.18	Success Rate: 95.0%	Duration: 185.9 s

Episode 5000	Score: 26.14	Success Rate: 99.0%	Duration: 214.2 s
Success: 9 / 10 (90.0000%)	Steps: 4.8889	Time: 0.1 s	Saved to: test18-03-2023-08:54:38.txt



In [29]:
experiment(soft_rewards, answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -1.92	Success Rate: 18.0%	Duration: 119.6 s

Episode 4000	Score: 3.61	Success Rate: 33.0%	Duration: 239.6 s

Episode 6000	Score: 9.42	Success Rate: 50.0%	Duration: 357.7 s

Episode 8000	Score: 10.80	Success Rate: 53.0%	Duration: 471.9 s

Episode 10000	Score: 13.80	Success Rate: 64.0%	Duration: 584.8 s

Episode 12000	Score: 15.84	Success Rate: 69.0%	Duration: 698.1 s

Episode 14000	Score: 18.10	Success Rate: 76.0%	Duration: 812.4 s

Episode 16000	Score: 17.27	Success Rate: 75.0%	Duration: 927.9 s

Episode 18000	Score: 17.45	Success Rate: 75.0%	Duration: 1045.6 s

Episode 20000	Score: 16.96	Success Rate: 73.0%	Duration: 1169.8 s
Success: 78 / 100 (78.0000%)	Steps: 4.2564	Time: 0.6 s	Saved to: test18-03-2023-09:14:08.txt



### Hard

In [30]:
experiment(hard_rewards, answers_10_100, guesses_10_100, 5000)


Episode  500	Score: -6.78	Success Rate: 47.0%	Duration: 20.7 s

Episode 1000	Score: 2.46	Success Rate: 80.0%	Duration: 38.2 s

Episode 1500	Score: 7.16	Success Rate: 98.0%	Duration: 54.0 s

Episode 2000	Score: 7.96	Success Rate: 100.0%	Duration: 68.7 s

Episode 2500	Score: 7.98	Success Rate: 100.0%	Duration: 98.5 s

Episode 3000	Score: 7.46	Success Rate: 100.0%	Duration: 120.3 s

Episode 3500	Score: 5.24	Success Rate: 91.0%	Duration: 132.0 s

Episode 4000	Score: 8.28	Success Rate: 100.0%	Duration: 142.9 s

Episode 4500	Score: 5.28	Success Rate: 94.0%	Duration: 155.8 s

Episode 5000	Score: 5.98	Success Rate: 94.0%	Duration: 169.3 s
Success: 10 / 10 (100.0000%)	Steps: 1.9000	Time: 0.0 s	Saved to: test18-03-2023-09:16:59.txt



In [31]:
experiment(hard_rewards, answers_100_100, guesses_100_100, 20000)


Episode 2000	Score: -15.00	Success Rate: 19.0%	Duration: 118.7 s


### Step

In [None]:
experiment(step_rewards, answers_10_100, guesses_10_100, 20000)

In [None]:
experiment(step_rewards, answers_100_100, guesses_100_100, 20000)

### Char

In [None]:
experiment(char_rewards, answers_10_100, guesses_10_100, 20000)

In [None]:
experiment(char_rewards, answers_100_100, guesses_100_100, 20000)

## States

Тестирую разные способы задания состояния

### StateYesNo

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
      rewards=lett_rewards,
      wordle=Wordle(vocabulary=guesses, answers=answers),
      state_instance=StateYesNo()
  )

  agent = Agent(
      state_size=env.state.size,
      action_size=len(env.wordle.vocabulary),
      action_constructor=partial(ActionVocabulary, vocabulary=guesses)
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

In [None]:
experiment(answers_10_100, guesses_10_100, 5000)

In [None]:
experiment(answers_100_100, guesses_100_100, 20000)

In [None]:
experiment(answers_50_200, guesses_50_200, 20000)

In [None]:
experiment(answers_200_200, guesses_200_200, 30000)

In [None]:
experiment(answers_100_2000, guesses_100_2000, 50000)

### StateVocabulary

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
      rewards=lett_rewards,
      wordle=Wordle(vocabulary=guesses, answers=answers),
      state_instance=StateVocabulary(
          answers_mask=np.fromiter((ans in guesses for ans in answers), dtype=bool)
      )
  )

  agent = Agent(
      state_size=env.state.size,
      action_size=len(env.wordle.vocabulary),
      action_constructor=partial(ActionVocabulary, vocabulary=guesses)
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

## Replay Sampling

### Uniform

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=lett_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={
      'alpha': 0,
      'alpha_decay_rate': 1,
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

### A Little Prioritized

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=lett_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={
      'alpha': 0.3,
      'alpha_decay_rate': 0.95,
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

### Middle Prioritized

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=lett_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={
      'alpha': 0.6,
      'alpha_decay_rate': 0.95,
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

### Much Prioritized

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=lett_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={
      'alpha': 0.9,
      'alpha_decay_rate': 0.95,
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

## Steps

Здесь используем оптимальные параметры с предыдущих экспериментов.

### 6 on train, 6 on test

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=lett_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={ # change this values to optimal ones!
      'alpha': 0.5,
      'alpha_decay_rate': 0.5,
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  test(env=env, agent=agent)

### 10 on train, 6 on test

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=lett_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers, max_guesses=10),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={ # change this values to optimal ones!
      'alpha': 0.5,
      'alpha_decay_rate': 0.5,
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  env.wordle.max_guesses = 6
  test(env=env, agent=agent)

### 15 on train, 6 on test

In [None]:
def experiment(answers, guesses, n_episodes, logging_level=None):
  env = Environment(
    rewards=lett_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers, max_guesses=15),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(env.wordle.vocabulary),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={ # change this values to optimal ones!
      'alpha': 0.5,
      'alpha_decay_rate': 0.5,
    }
  )

  train(env=env, agent=agent, n_episodes=n_episodes, logging_level=logging_level)
  env.wordle.max_guesses = 6
  test(env=env, agent=agent)

### stages on steps number

Появилась идея: сначала обучить на 6, затем на 10, и наконец на inf+abortion.

In [None]:
def steps_stage_train(answers, guesses, episodes_counts):
  env = Environment(
    rewards=lett_rewards,
    wordle=Wordle(vocabulary=guesses, answers=answers, max_guesses=6),
    state_instance=StateYesNo()
  )

  agent = Agent(
    state_size=env.state.size,
    action_size=len(guesses),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={ # change this values to optimal ones!
      'alpha': 0.5,
      'alpha_decay_rate': 0.5,
    }
  )

  train(env=env, agent=agent, n_episodes=episode_counts['6']) # big number
  test(env=env, agent=agent)

  train(env=env, agent=agent, n_episodes=episode_counts['10']) # smaller
  test(env=env, agent=agent)

  train(env=env, agent=agent, n_episodes=episode_counts['inf'], # even smaller
        end_of_episode=lambda env: env.worlde.win == True)
  test(env=env, agent=agent)

## Stages on answers list size

Фиксированный guesses размера 100. Меняющийся answers по порядку: 10, 40, 70, 100.

In [None]:
def answer_stage_train(
    guesses, start_size, end_size, n_stages, episodes_counts
)
  agent = Agent(
    state_size=StateYesNo().size + 100,
    action_size=len(guesses),
    action_constructor=partial(ActionVocabulary, vocabulary=guesses),
    priority_rate={ # change this values to optimal ones!
      'alpha': 0.5,
      'alpha_decay_rate': 0.5,
    }
  )

  for i in range(n_stages):
    t = i / (n_stages - 1)
    size = start_size * (1 - t) + end_size * t
    answers = guesses[:size]

    env = Environment(
      rewards=letters_rewards,
      wordle=Wordle(
          vocabulary=guesses,
          answers=answers
      ),
      state_instance=StateVocabulary(
          answers_mask=np.fromiter((ans in guesses for ans in answers), dtype=bool)
      )
    )

    train(env=env, agent=agent, n_episodes=episodes_counts[i])
    test(env=env, agent=agent)
    print('\n============================')

сетка генерит слово (не обязательно из словаря) а затем в словаре находит слово наиболее похожее на то, которое сгенерировала --- по идее это и есть embeddings