# Affixes

Visualization, general structures, statistics, etc.

In [14]:
from utils import load_cache

def preview_affix_data(affix_data, top_n=15):
    for k, v in sorted(affix_data.items(), key=lambda x: -x[1])[:top_n]:
        print(f"  {k:<10} {v}")

affix_data = load_cache("affixes_train_cons.pkl")
preview_affix_data(affix_data=affix_data, top_n=100)

  ed         31090
  ng         21398
  ati        21249
  es         17210
  tio        16296
  nes        16224
  ly         15704
  er         15640
  ter        15375
  ica        12687
  ic         12192
  on         11856
  ss         11622
  al         11494
  ent        10929
  all        10764
  tin        10596
  eri        10479
  le         10444
  te         10196
  us         9914
  ate        9582
  ver        9477
  ali        9393
  ing        8769
  lin        8646
  ion        8229
  an         8128
  sti        8046
  per        7947
  ell        7866
  oni        7836
  ist        7812
  rat        7755
  rin        7563
  ene        7545
  iti        7437
  ari        7404
  ine        7227
  ere        7206
  lat        7023
  ill        7008
  tic        6993
  atio       6974
  tra        6873
  ran        6834
  lit        6816
  era        6684
  ste        6678
  st         6522
  ili        6372
  men        6351
  the        6351
  abl        6256
  olo   

Minimal affixes length

In [15]:
print(min([len(k) for k in affix_data.keys()]))

2


Structure of the affixes file

In [4]:
from pickle import load

with open("affixes_train_cons.pkl", "rb") as f:
    affixes = load(f)

# Just to confirm the structure:
print("One sample from affixes:")
for k, v in list(affixes.items())[:5]:
    print(f"{k}: {v}")

One sample from affixes:
th: 1268
thi: 2463
no: 510
in: 4010
int: 3180


# Analysis of Subpattern Gradient agent

The SB agent tends to recover at least 60% of the letters in about 60-70% of the cases for words of length between 5-10 where it fails to reveal the full answer

In [13]:
import pickle
from collections import defaultdict

def count_blanks(pattern):
    return sum(1 for c in pattern if c == "_")

# Load data
with open("transformer_eval_data_from_subpattern_short.pkl", "rb") as f:
    data = pickle.load(f)

print(f"🔢 Total samples: {len(data)}")

# Validate each sample's reveal %
reveal_stats = []
length_buckets = defaultdict(list)

for pattern, guessed, true_word in data:
    word_len = len(true_word)
    blanks = count_blanks(pattern)
    reveal_ratio = (word_len - blanks) / word_len
    reveal_stats.append(reveal_ratio)
    length_buckets[word_len].append(reveal_ratio)

# Summary
above_60 = sum(r >= 0.6 for r in reveal_stats)
print(f"✅ Samples ≥ 60% revealed: {above_60} / {len(data)} ({above_60 / len(data):.2%})")

# Optional: average by length
print("\n📊 Average reveal ratio by word length:")
for length in sorted(length_buckets):
    ratios = length_buckets[length]
    avg = sum(ratios) / len(ratios)
    print(f" - Length {length:2d}: {avg:.2%} over {len(ratios)} samples")


🔢 Total samples: 1000
✅ Samples ≥ 60% revealed: 702 / 1000 (70.20%)

📊 Average reveal ratio by word length:
 - Length  6: 60.11% over 155 samples
 - Length  7: 68.81% over 180 samples
 - Length  8: 66.55% over 210 samples
 - Length  9: 69.27% over 239 samples
 - Length 10: 72.18% over 216 samples


In [16]:
for i, (pattern, guessed, true_word) in enumerate(data[:5]):
    print(f"Sample {i+1}:")
    print(f" - Pattern:    {pattern}")
    print(f" - Guessed:    {guessed}")
    print(f" - True word:  {true_word}\n")


Sample 1:
 - Pattern:    moreo_er
 - Guessed:    aeimnort
 - True word:  moreover

Sample 2:
 - Pattern:    _a_ead_
 - Guessed:    acdehirt
 - True word:  makeado

Sample 3:
 - Pattern:    scaril_
 - Guessed:    acehilorst
 - True word:  scarily

Sample 4:
 - Pattern:    _atrolmen
 - Guessed:    aceilmnort
 - True word:  patrolmen

Sample 5:
 - Pattern:    __errar_
 - Guessed:    aeirt
 - True word:  sherrard



In [17]:
with open("transformer_data_from_subpattern_short25000.pkl", "rb") as f:
    full_history = pickle.load(f)

print(f"Total full-history games: {len(full_history)}")
print("First full-history sample:")
for step in full_history[:20]:
    print(step)

Total full-history games: 25000
First full-history sample:
('e_erting', 'aceginrst', 'everting')
('__l_ie', 'defilrt', 'walkie')
('o_t_ta_t', 'aeinot', 'outstart')
('neo_esha', 'aehilmnorstv', 'neodesha')
('__allic', 'aceilnrst', 'phallic')
('_appers', 'aehinoprstv', 'lappers')
('pi___er', 'adeioprtv', 'pincher')
('_ri_e__s', 'deinrsvz', 'writeups')
('l_s_inia', 'ahilmnrst', 'luscinia')
('_oori__', 'aeinor', 'boorish')
('tar_e_', 'acdeimort', 'tarbes')
('_e_ri_', 'aeinrt', 'bedrid')
('_aters', 'aeilmorst', 'haters')
('t__e_ton', 'aceinopst', 'tylerton')
('r__tee', 'aehiortw', 'runtee')
('_enti_at__', 'aceint', 'ventilator')
('_iver', 'einrtv', 'liver')
('_raman', 'acegimnrt', 'braman')
('turtle_e_', 'abeilnortu', 'turtlepeg')
('__a_le', 'aeilnrt', 'chawle')


In [19]:
with open("transformer_eval_data_from_subpattern_short.pkl", "rb") as f:
    full_history = pickle.load(f)

print(f"Total full-history games: {len(full_history)}")
print("First full-history sample:")
for step in full_history[:20]:
    print(step)


Total full-history games: 1000
First full-history sample:
('moreo_er', 'aeimnort', 'moreover')
('_a_ead_', 'acdehirt', 'makeado')
('scaril_', 'acehilorst', 'scarily')
('_atrolmen', 'aceilmnort', 'patrolmen')
('__errar_', 'aeirt', 'sherrard')
('_lan_il_', 'acdeilnrtu', 'slangily')
('uproot_', 'aehinoprtu', 'uproots')
('e_ocardiac', 'acdehilmnort', 'exocardiac')
('r__i_alia', 'acdeilnrt', 'robigalia')
('t_rannized', 'adeinorstuvz', 'tyrannized')
('ee_i__', 'eir', 'eetion')
('__i_alian', 'acdeilnrtz', 'phigalian')
('sn___ered', 'deinprstv', 'snookered')
('t__iad', 'acdeilnrt', 'thyiad')
('e__o_t', 'aeiorst', 'excoct')
('rea___stom', 'adeimorst', 'reaccustom')
('_live__le', 'eilnrv', 'olivepale')
('aard_o__', 'adehiort', 'aardwolf')
('__ori_', 'aeinor', 'florid')
('_res___it', 'aeilnorst', 'presubmit')


In [2]:
## Testing transformer_prediction is consistent with evaluate_prediction function

from submission_agent import agent
from agent import HangmanAgent
from utils import HangmanTransformerModelV2, HangmanTransformerModelV3

if agent.mode != "Transformer":
    print(f"Current Agent mode: {agent.mode}")
    print("Switching to Transformer mode...")
    agent.mode = "Transformer"
    print(f"Switched Agent mode to: {agent.mode}")

# Ensure model is loaded and on GPU and run evaluation
if agent.TransformerModel==HangmanTransformerModelV3:
    model_path = "checkpoints/v3_run/transformer_best.pt"
    agent.transformer_model = agent.load_transformer_model(
        HangmanTransformerModelV3,
        model_path,
        use_cuda=True,
    )
    agent.evaluate_transformer_prediction(
        use_blank_mask_for_transformer=True,
        eval_data_path="transformer_eval_data_from_subpattern_short.pkl",
        log_path="logs/v3_eval/transformer_best.jsonl",
        n_eval = 10
    )
elif agent.TransformerModel==HangmanTransformerModelV2:
    print("Using HangmanTransformerModelV2")
    model_path = "checkpoints/v2_run/transformer_best.pt"
    agent.transformer_model = agent.load_transformer_model(
        HangmanTransformerModelV2,
        model_path,
        use_cuda=True
    )
    agent.evaluate_transformer_prediction(
        eval_data_path="transformer_eval_data_from_subpattern.pkl"
    )

print(f"Evaluation completed for {model_path}.")

Model max sequence length set to: 11
Transformer model loaded from checkpoints/v3_run/transformer_best.pt
[✓] Loaded 1000 samples from transformer_eval_data_from_subpattern_short.pkl

[Sample 1]
Pattern: moreo_er
Guessed: aeimnort
True:    moreover
Predicted letter: g
Blank Acc: 0.00% (0/1)
Prediction based on transformer_prediction(): g

[Sample 2]
Pattern: _a_ead_
Guessed: acdehirt
True:    makeado
Predicted letter: l
Blank Acc: 0.00% (0/3)
Prediction based on transformer_prediction(): l

[Sample 3]
Pattern: scaril_
Guessed: acehilorst
True:    scarily
Predicted letter: y
Blank Acc: 100.00% (1/1)
Prediction based on transformer_prediction(): y

[Sample 4]
Pattern: _atrolmen
Guessed: aceilmnort
True:    patrolmen
Predicted letter: p
Blank Acc: 100.00% (1/1)
Prediction based on transformer_prediction(): p

[Sample 5]
Pattern: __errar_
Guessed: aeirt
True:    sherrard
Predicted letter: v
Blank Acc: 0.00% (0/3)
Prediction based on transformer_prediction(): v

[Sample 6]
Pattern: _lan_il_