In [None]:
import random

class HangmanGame:
    def __init__(self, word_list):
        self.word_list = word_list
        self.target_word = None
        self.masked_word = None
        self.guessed_letters = []
        self.wrong_guesses = 0
        self.max_wrong_guesses = 6

    def select_word(self):
        self.target_word = random.choice(self.word_list).lower()
        self.masked_word = '_' * len(self.target_word)

    def mask_word(self):
        return ' '.join([letter if letter in self.guessed_letters else '_' for letter in self.target_word])

    def guess_letter(self, letter):


        letter = letter.lower()
        if letter in self.guessed_letters:
            return False, "Letter already guessed"

        self.guessed_letters.append(letter)

        #print(f"Target Word: {self.target_word}")
        #print(f"Masked Word: {self.masked_word}")
        #print(f"Guessed Letters: {', '.join(self.guessed_letters)}")
        #print(f"Wrong Guesses: {self.wrong_guesses}")
        #print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++")

        if letter in self.target_word:
            self.masked_word = self.mask_word()
            if '_' not in self.masked_word:
                return True, "Win"
            return True, "Correct guess"
        else:
            self.wrong_guesses += 1
            if self.wrong_guesses >= self.max_wrong_guesses:
                return False, "Lose"
            return False, "Incorrect guess"



    def start_game(self):
        self.select_word()
        self.guessed_letters = []
        self.wrong_guesses = 0

    def get_game_status(self):

        return {
            "masked_word": self.masked_word,
            "guessed_letters": self.guessed_letters,
            "wrong_guesses": self.wrong_guesses,
            "max_wrong_guesses": self.max_wrong_guesses
        }


In [None]:
import requests
import collections
import numpy as np
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, Dropout, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from copy import deepcopy
import random
from keras.callbacks import Callback
from tqdm.keras import TqdmCallback
from itertools import combinations, chain
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from copy import deepcopy
import numpy as np
from tqdm import tqdm
from itertools import product
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from tqdm import tqdm



In [None]:
def build_dictionary(dictionary_file_location):
    with open(dictionary_file_location, "r") as text_file:
        full_dictionary = text_file.read().splitlines()
    return full_dictionary


In [None]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import random

def encode_word(word):
    encoded_word = []
    for cha in word:
        if cha == "_":
            encoded_word.append(27)  # Keep 27 to indicate masking, handled separately
        else:
            encoded_word.append(ord(cha) - ord('a') + 1)  # 1-26 for 'a'-'z'
    return encoded_word

def generate_masked_versions(word_encoded, unique_chars):
    masks = []
    targets = []
    for char in unique_chars:
        if char == 27:  # Skip the mask token for target generation
            continue
        for i, c in enumerate(word_encoded):
            if c == char:
                masked_word = list(word_encoded)
                masked_word[i] = 27  # Use 27 as the mask indicator
                masks.append(masked_word)
                targets.append(char - 1)  # Shift to 0-25 for 'a'-'z'
    return masks, targets

def get_xy_adjusted(full_dictionary, test_size=0.2, val_size=0.25):
    dict_train_val, dict_test = train_test_split(full_dictionary, test_size=test_size, random_state=42)
    X, y = [], []
    max_length = 0

    for word in tqdm(dict_train_val, desc="Processing words"):
        word_encoded = encode_word(word)
        unique_chars = set(word_encoded)
        unique_chars.discard(27)  # Remove the mask indicator from unique chars
        masks, targets = generate_masked_versions(word_encoded, unique_chars)
        X.extend(masks)
        y.extend(targets)
        max_length = max(max_length, max(len(mask) for mask in masks))

    X_padded = pad_sequences(X, maxlen=max_length, padding='post', value=27)  # Use 27 for padding
    y_categorical = tf.keras.utils.to_categorical(y, num_classes=26)  # Categories for 'a'-'z'

    train_x, val_x, train_y, val_y = train_test_split(X_padded, y_categorical, test_size=val_size, random_state=42)

    return train_x, val_x, train_y, val_y, dict_train_val, dict_test, max_length





In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, TimeDistributed, BatchNormalization, Dropout, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, LearningRateScheduler
import tensorflow as tf


def train_model_with_progress(train_x, train_y, val_x, val_y, max_length, batch_size=512, epochs=5, initial_learning_rate=0.001, patience=1, factor=0.5, min_lr=1e-6):
    model = Sequential([
        Embedding(28, 128, input_length=max_length),
        Bidirectional(LSTM(128, return_sequences=True)),
        BatchNormalization(),
        Bidirectional(LSTM(128, return_sequences=True)),
        BatchNormalization(),
        Bidirectional(LSTM(128, return_sequences=True)),
        BatchNormalization(),
        Bidirectional(LSTM(64)),
        Dropout(0.5),
        Dense(26, activation='softmax')
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=initial_learning_rate,clipvalue=1.0)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=['accuracy', 'top_k_categorical_accuracy'])

    # Define ModelCheckpoint callback to save the best model based on validation loss
    checkpoint_callback = ModelCheckpoint(filepath="/content/drive/MyDrive/2_The_best_lstm_model.keras", monitor='val_loss', verbose=1, save_best_only=True, mode='min')

    # Define ReduceLROnPlateau callback to adjust learning rate when validation loss does not decrease
    reduce_lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=factor, patience=patience, min_lr=min_lr, verbose=1)

    # Train the model with the provided dataset and progress display
    history = model.fit(train_x, train_y, validation_data=(val_x, val_y), batch_size=batch_size, epochs=epochs, callbacks=[checkpoint_callback, reduce_lr_callback], verbose=1)

    return model, history






In [None]:
dictionary_file_location = "/content/drive/MyDrive/words_250000_train.txt"
full_dictionary = build_dictionary(dictionary_file_location)

In [None]:
test_size = 0.05
val_size = 0.05 / 0.95
batch_size = 1000
epochs = 200

In [None]:



train_x, val_x, train_y, val_y, dict_train_val, dict_test, max_length = get_xy_adjusted(
    full_dictionary=full_dictionary,
    test_size=test_size,
    val_size=val_size

)



Processing words:   0%|          | 0/215935 [00:00<?, ?it/s]

In [None]:
model, history = train_model_with_progress(train_x, train_y, val_x, val_y, max_length, batch_size=batch_size, epochs=epochs)





Epoch 1/200
Epoch 1: val_loss improved from inf to 1.41996, saving model to /content/drive/MyDrive/2_The_best_lstm_model.keras
Epoch 2/200
Epoch 2: val_loss improved from 1.41996 to 1.23216, saving model to /content/drive/MyDrive/2_The_best_lstm_model.keras
Epoch 3/200
Epoch 3: val_loss improved from 1.23216 to 1.12792, saving model to /content/drive/MyDrive/2_The_best_lstm_model.keras
Epoch 4/200
Epoch 4: val_loss improved from 1.12792 to 1.06387, saving model to /content/drive/MyDrive/2_The_best_lstm_model.keras
Epoch 5/200
Epoch 5: val_loss improved from 1.06387 to 1.01494, saving model to /content/drive/MyDrive/2_The_best_lstm_model.keras
Epoch 6/200
Epoch 6: val_loss improved from 1.01494 to 0.98097, saving model to /content/drive/MyDrive/2_The_best_lstm_model.keras
Epoch 7/200
Epoch 7: val_loss improved from 0.98097 to 0.95471, saving model to /content/drive/MyDrive/2_The_best_lstm_model.keras
Epoch 8/200
Epoch 8: val_loss improved from 0.95471 to 0.93946, saving model to /conten

KeyboardInterrupt: 

In [None]:
model.save('/content/drive/MyDrive/lstm_model.keras')

In [None]:
from tqdm import tqdm
from tensorflow.keras.models import load_model
import re
import collections

In [None]:
model = load_model('/content/drive/MyDrive/2_The_best_lstm_model.keras')

In [None]:
dictionary_file_location = "/content/drive/MyDrive/words_250000_train.txt"
full_dictionary = build_dictionary(dictionary_file_location)

In [None]:
full_dictionary_common_letter_sorted = collections.Counter("".join(dict_train_val)).most_common()
current_dictionary = dict_train_val

In [None]:
def build_dictionary(dictionary_file_location):
    with open(dictionary_file_location, "r") as text_file:
        full_dictionary = text_file.read().splitlines()
    return full_dictionary



In [None]:
from collections import Counter, defaultdict
import re

def refine_possible_words(current_dictionary, pattern):
    """Refine the list of possible words based on the known pattern."""
    regex_pattern = re.compile(pattern)
    return [word for word in current_dictionary if regex_pattern.match(word)]

def calculate_letter_frequencies(possible_words, guessed_letters):
    """Calculate frequencies of letters in possible words, excluding guessed letters."""
    letters = "".join([word for word in possible_words if all(ch not in guessed_letters for ch in word)])
    return Counter(letters)

def adjust_probabilities_based_on_feedback(letter_freq, feedback):
    """Adjust letter frequencies based on feedback from previous guesses."""
    # This is a placeholder for any logic that adjusts frequencies based on game feedback
    return letter_freq

def algo_guess_prob(current_dictionary, word, guessed_letters, full_dictionary_common_letter_sorted):
    clean_word = word.replace(" ", "").replace("_", ".")
    possible_words = refine_possible_words(current_dictionary, clean_word)

    if not possible_words:
        # Fallback to full dictionary letter frequency if no matching words
        letter_freq = {letter: freq for letter, freq in full_dictionary_common_letter_sorted if letter not in guessed_letters}
    else:
        letter_freq = calculate_letter_frequencies(possible_words, guessed_letters)

    # Example placeholder for feedback adjustment - to be implemented based on game dynamics
    letter_freq_adjusted = adjust_probabilities_based_on_feedback(letter_freq, None)

    total = sum(letter_freq_adjusted.values())
    letter_probs = {letter: count / total for letter, count in letter_freq_adjusted.items() if letter not in guessed_letters}

    return letter_probs


In [None]:
def normalize_counter(counter, guessed_letters):
    #print(counter.values())
    total = sum(counter.values())
    prob_dict = {letter: count / total for letter, count in counter.items() if letter not in guessed_letters}
    return prob_dict

def combine_predictions(model_probs, algo_probs, guessed_letters):
    combined_probs = {}


    # 确保算法概率和模型概率使用相同的字母集


    model_probs = model_prediction_to_dict(model_probs)


    letters = set(model_probs.keys()) | set(algo_probs.keys()) - set(guessed_letters)

    for letter in letters:
        combined_probs[letter] =  model_probs.get(letter, 0)

    # 标准化组合概率分布
    total_prob = sum(combined_probs.values())
    normalized_probs = {letter: prob / total_prob for letter, prob in combined_probs.items()}

    # 选择概率最高的字母
    best_guess = max(normalized_probs, key=normalized_probs.get)
    return best_guess

In [None]:
def model_prediction_to_dict(model_prediction):
    # Convert the model predictions array to a dictionary with characters as keys
    return {chr(i + ord('a')): prob for i, prob in enumerate(model_prediction)}

In [None]:
def hybrid_guess(model, word, guessed_letters, max_length, current_dictionary, full_dictionary_common_letter_sorted):
    # 使用模型进行预测
    clean_word = word.replace(" ", "")
    encoded_word = encode_word(clean_word)
    paded_word = pad_sequences([encoded_word], maxlen=max_length, padding='post')
    model_prediction = model.predict(paded_word)[0]

    for i, letter in enumerate('abcdefghijklmnopqrstuvwxyz'):
        if letter in guessed_letters:
            model_prediction[i] = 0

    # 获取算法预测的概率
    algo_probs = algo_guess_prob(current_dictionary, word, guessed_letters, full_dictionary_common_letter_sorted)
    algo_probs_normalized = normalize_counter(algo_probs, guessed_letters)

    # 结合模型和算法的预测
    best_guess = combine_predictions(model_prediction, algo_probs_normalized, guessed_letters)

    print(best_guess)
    return best_guess

In [None]:
def play_hangman(model, word_list, max_length, current_dictionary, full_dictionary_common_letter_sorted):
    game_wins = 0
    game_losses = 0
    n = len(word_list)

    with tqdm(total=n) as pbar:
        for word in word_list:
            game = HangmanGame(word_list)
            game.start_game()

            while True:
                masked_word = game.mask_word()

                # 使用混合猜测策略替代原有的猜测方法
                guess_letter = hybrid_guess(model, masked_word.replace(' ', ''), game.guessed_letters, max_length, current_dictionary, full_dictionary_common_letter_sorted)


                correct, message = game.guess_letter(guess_letter)

                if message in ["Win", "Lose"]:
                    if message == "Win":
                        game_wins += 1
                    else:
                        game_losses += 1
                    break

            pbar.update(1)  # 更新进度条

    print(f"Played {len(word_list)} games: {game_wins} wins, {game_losses} losses")
    win_rate = (game_wins / n) * 100
    print(f"Win Rate: {win_rate:.2f}%")
    return win_rate

In [None]:
win_rate = play_hangman(model, dict_test[:100], max_length, current_dictionary, full_dictionary_common_letter_sorted)





  0%|          | 0/100 [00:00<?, ?it/s]

o
n
a
p
l
t


  1%|          | 1/100 [00:02<03:20,  2.03s/it]

b
h
b
m
o
n
l
r
a


  2%|▏         | 2/100 [00:04<03:24,  2.09s/it]

k
h
b
m
n
a
i
t
c
s
e


  3%|▎         | 3/100 [00:06<03:17,  2.04s/it]

l
h
o
b
n
u
i
a


  4%|▍         | 4/100 [00:08<03:29,  2.18s/it]

p
h
o
y
n
u
a
l
b


  5%|▌         | 5/100 [00:10<03:15,  2.06s/it]

p
h
o
n
u
i
p
a


  6%|▌         | 6/100 [00:12<03:19,  2.12s/it]

c
h
o
n
u
r
p
e
t
b
s


  7%|▋         | 7/100 [00:14<03:09,  2.04s/it]

d
h
b
m
v
w


  8%|▊         | 8/100 [00:15<02:44,  1.79s/it]

r
h
b
m
v
w


  9%|▉         | 9/100 [00:17<02:28,  1.63s/it]

r
h
o
n
r
a
u
c
s
w


 10%|█         | 10/100 [00:19<02:42,  1.81s/it]

t
h
b
m
w
o
n
c
r
a


 11%|█         | 11/100 [00:20<02:35,  1.75s/it]

p
h
o
n
b
m
a
i


 12%|█▏        | 12/100 [00:23<02:53,  1.97s/it]

c
h
o
b
t
i
c
s
g
p
l


 13%|█▎        | 13/100 [00:25<02:58,  2.05s/it]

f
h
o
u
a
b
r
e
i
t
n


 14%|█▍        | 14/100 [00:27<02:51,  1.99s/it]

l
h
b
m
v
w
r
n
o
a


 15%|█▌        | 15/100 [00:29<02:44,  1.93s/it]

u
h
b
m
o
u
r
i
s
a
p
e
n


 16%|█▌        | 16/100 [00:31<02:52,  2.05s/it]

l
h
o
n
u
a
p
b
r


 17%|█▋        | 17/100 [00:33<02:57,  2.14s/it]

l
h
b
o
w
r
u
a
e
t
p
d
j
s


 18%|█▊        | 18/100 [00:36<03:00,  2.20s/it]

n
h
v
g
b
n
f
o
t
e
c


 19%|█▉        | 19/100 [00:37<02:46,  2.05s/it]

y
o
n
a
p
l
t


 20%|██        | 20/100 [00:39<02:43,  2.05s/it]

b
h
o
b
n
m


 21%|██        | 21/100 [00:41<02:39,  2.01s/it]

w
h
o
e
r
i
n
p
m
t


 22%|██▏       | 22/100 [00:43<02:27,  1.89s/it]

s
h
o
b
n
m
a


 23%|██▎       | 23/100 [00:46<02:44,  2.13s/it]

t
h
b
m
n
o
d
r
y
a
e
l
t
s
c
p


 24%|██▍       | 24/100 [00:48<02:47,  2.21s/it]

i
h
o
n
u
f
t
p


 25%|██▌       | 25/100 [00:50<02:39,  2.12s/it]

k
h
b
m
v
w


 26%|██▌       | 26/100 [00:51<02:18,  1.87s/it]

r
h
o
b
a
u
r
n
d
y
i
c
l


 27%|██▋       | 27/100 [00:53<02:19,  1.91s/it]

k
h
b
m
n
u
p
c
o
a
r
s
i


 28%|██▊       | 28/100 [00:56<02:32,  2.11s/it]

t
h
b
m
n
a
i
t
c
s
e


 29%|██▉       | 29/100 [00:58<02:29,  2.11s/it]

l
h
o
s
p
a
u
r
i
e
t


 30%|███       | 30/100 [01:00<02:28,  2.12s/it]

n
h
o
n
d
t
b
r


 31%|███       | 31/100 [01:02<02:16,  1.98s/it]

a
h
o
y
n
u
a
l
b


 32%|███▏      | 32/100 [01:04<02:10,  1.92s/it]

p
h
o
n
b
m


 33%|███▎      | 33/100 [01:06<02:21,  2.11s/it]

f
h
o
e
r
t
i
y
u
a
n
k


 34%|███▍      | 34/100 [01:08<02:15,  2.05s/it]

c
h
b
m
o
n
l
r
a


 35%|███▌      | 35/100 [01:10<02:14,  2.08s/it]

k
h
b
o
w
r
u
a
e
t
p
d
j
s


 36%|███▌      | 36/100 [01:12<02:16,  2.14s/it]

n
h
b
m
o
n
u
i
c
t


 37%|███▋      | 37/100 [01:15<02:18,  2.20s/it]

k
o
n
a
p
l
t


 38%|███▊      | 38/100 [01:17<02:12,  2.14s/it]

b
h
v
g
b
n
f
o
t
e
c


 39%|███▉      | 39/100 [01:18<02:01,  1.99s/it]

y
h
c
t
o
p
s
e
r
i
a
b
f


 40%|████      | 40/100 [01:20<01:58,  1.98s/it]

g
h
o
p
t
c
l
m


 41%|████      | 41/100 [01:22<01:46,  1.80s/it]

b
h
r
d
o
b
i
e
p
u


 42%|████▏     | 42/100 [01:23<01:42,  1.76s/it]

l
h
o
n
u
i
p
s
c
m
a


 43%|████▎     | 43/100 [01:26<01:52,  1.98s/it]

t
h
o
s
d
c
w
y
b
i
a
p
l


 44%|████▍     | 44/100 [01:28<01:53,  2.02s/it]

r
h
o
n
u
a
i
c


 45%|████▌     | 45/100 [01:30<01:55,  2.09s/it]

p
h
b
m
v
w


 46%|████▌     | 46/100 [01:32<01:40,  1.86s/it]

r
h
b
m
o
n


 47%|████▋     | 47/100 [01:34<01:39,  1.87s/it]

f
h
b
m
n
o
d
r
y
a
e
l
t
s
c
p


 48%|████▊     | 48/100 [01:36<01:47,  2.07s/it]

i
h
o
n
u
r
p
e
t
b
s


 49%|████▉     | 49/100 [01:38<01:42,  2.02s/it]

d
h
o
n
u
f
t
p


 50%|█████     | 50/100 [01:40<01:40,  2.01s/it]

k
h
o
n
u
i
p
t
l
a


 51%|█████     | 51/100 [01:42<01:42,  2.10s/it]

z
o
a
b
t
i


 52%|█████▏    | 52/100 [01:45<01:47,  2.24s/it]

n
h
o
b
n
m
a


 53%|█████▎    | 53/100 [01:48<01:52,  2.40s/it]

p
h
b
a
o
u
r
i
s
n
t


 54%|█████▍    | 54/100 [01:49<01:42,  2.24s/it]

k
h
o
n
b
m


 55%|█████▌    | 55/100 [01:52<01:46,  2.37s/it]

f
h
v
o
r
a
u
t
i
d
f


 56%|█████▌    | 56/100 [01:54<01:37,  2.21s/it]

e
h
o
n
t
u
l
p
b


 57%|█████▋    | 57/100 [01:56<01:34,  2.19s/it]

r
h
o
b
n
m
a


 58%|█████▊    | 58/100 [01:59<01:38,  2.35s/it]

g
h
b
m
n
o
u


 59%|█████▉    | 59/100 [02:00<01:27,  2.12s/it]

c
h
b
m
v
w


 60%|██████    | 60/100 [02:02<01:17,  1.95s/it]

f
h
b
m
n
o
p
g


 61%|██████    | 61/100 [02:04<01:13,  1.89s/it]

c
h
o
b
w
f
r
t
i
c
s
e
p
l


 62%|██████▏   | 62/100 [02:06<01:15,  1.98s/it]

m
o
n
a
b
r
c
i


 63%|██████▎   | 63/100 [02:08<01:16,  2.07s/it]

t
h
c
t
o
p
s
e
r
i
a
b
f


 64%|██████▍   | 64/100 [02:10<01:12,  2.03s/it]

g
h
b
m
n
o
u


 65%|██████▌   | 65/100 [02:12<01:06,  1.89s/it]

c
h
o
b
n
u
i
a


 66%|██████▌   | 66/100 [02:14<01:10,  2.06s/it]

p
h
o
u
a
b
r
e
i
t
n


 67%|██████▋   | 67/100 [02:16<01:06,  2.02s/it]

l
h
o
e
r
t
i
y
u
a
n
k


 68%|██████▊   | 68/100 [02:18<01:03,  1.99s/it]

c
h
o
b
n
u
a


 69%|██████▉   | 69/100 [02:20<01:01,  1.97s/it]

c
h
o
b
t
i
c
s
g
p
l


 70%|███████   | 70/100 [02:22<01:01,  2.04s/it]

f
h
o
n
b
m
a
i


 71%|███████   | 71/100 [02:24<01:00,  2.09s/it]

c
h
b
m
n
o
p
g


 72%|███████▏  | 72/100 [02:26<00:55,  1.98s/it]

c
h
o
c
r
y
a
b
e
i
l


 73%|███████▎  | 73/100 [02:28<00:55,  2.06s/it]

z
h
o
n
b
a
e
l


 74%|███████▍  | 74/100 [02:31<00:56,  2.19s/it]

r
h
b
m
v
w


 75%|███████▌  | 75/100 [02:32<00:49,  1.98s/it]

f
h
o
b
n
u
r
a


 76%|███████▌  | 76/100 [02:35<00:50,  2.12s/it]

s
h
b
m
v
w
r
n
o
a


 77%|███████▋  | 77/100 [02:37<00:46,  2.04s/it]

u
h
o
n
b
m
a
i


 78%|███████▊  | 78/100 [02:39<00:47,  2.15s/it]

c
h
b
m
v
q
k


 79%|███████▉  | 79/100 [02:40<00:40,  1.91s/it]

r
h
b
m
o
s
i
v
r
u
x
n


 80%|████████  | 80/100 [02:43<00:41,  2.08s/it]

p
h
o
n
d
t
b
r


 81%|████████  | 81/100 [02:44<00:37,  1.95s/it]

a
h
r
o
p
b
t
s
c
a
l


 82%|████████▏ | 82/100 [02:46<00:34,  1.92s/it]

e
h
b
m
v
q
k


 83%|████████▎ | 83/100 [02:48<00:29,  1.73s/it]

r
h
r
d
o
b
i
e
p
u


 84%|████████▍ | 84/100 [02:49<00:27,  1.72s/it]

l
h
o
n
u
a
b
r


 85%|████████▌ | 85/100 [02:51<00:27,  1.82s/it]

p
h
o
p
t
c
l
m


 86%|████████▌ | 86/100 [02:53<00:23,  1.69s/it]

b
h
b
o
a
r
u
i
n
f


 87%|████████▋ | 87/100 [02:55<00:22,  1.76s/it]

l
h
o
s
p
a
u
r
i
e
t


 88%|████████▊ | 88/100 [02:57<00:22,  1.88s/it]

n
h
o
u
a
b
r
e
i
t
n


 89%|████████▉ | 89/100 [02:59<00:20,  1.88s/it]

l
h
d
o
f
a
b
p
m
i
r
e


 90%|█████████ | 90/100 [03:01<00:19,  1.93s/it]

c
h
b
m
w
o
n
c
r
a


 91%|█████████ | 91/100 [03:02<00:16,  1.82s/it]

p
h
o
s
n
g
d
b
c


 92%|█████████▏| 92/100 [03:04<00:14,  1.81s/it]

w
h
o
b
n
u
a


 93%|█████████▎| 93/100 [03:06<00:12,  1.83s/it]

c
h
b
m
n
o
d
r
y
a
e
l
t
s
c
p


 94%|█████████▍| 94/100 [03:08<00:12,  2.01s/it]

i
h
b
o
w
k
t
a
i
m
n
c


 95%|█████████▌| 95/100 [03:10<00:10,  2.02s/it]

e
h
b
m
n
o
l
w
f


 96%|█████████▌| 96/100 [03:12<00:07,  1.93s/it]

u
h
v
o
r
a
u
t
i
d
f


 97%|█████████▋| 97/100 [03:14<00:05,  1.89s/it]

e
h
o
n
u
r
p
e
t
b
s


 98%|█████████▊| 98/100 [03:16<00:03,  1.88s/it]

d
h
o
n
u
b
a
p
s
e


 99%|█████████▉| 99/100 [03:18<00:01,  2.00s/it]

k
h
b
m
n
o
u


100%|██████████| 100/100 [03:20<00:00,  2.00s/it]

c
Played 100 games: 16 wins, 84 losses
Win Rate: 16.00%





In [None]:
char_to_index = {
    'a': 1, 'b': 2, 'c': 3, 'd': 4,
    'e': 5, 'f': 6, 'g': 7, 'h': 8,
    'i': 9, 'j': 10, 'k': 11, 'l': 12,
    'm': 13, 'n': 14, 'o': 15, 'p': 16,
    'q': 17, 'r': 18, 's': 19, 't': 20,
    'u': 21, 'v': 22, 'w': 23, 'x': 24,
    'y': 25, 'z': 26, '_': 27
}


In [None]:
import time
import requests
import numpy as np
import collections
from keras.preprocessing.sequence import pad_sequences
from tqdm import tqdm


class HangmanAPI(object):
    def __init__(self, model, max_length,char_to_index, access_token=None, session=None, timeout=None):
        self.model = model
        self.max_length = max_length
        self.hangman_url = self.determine_hangman_url()
        self.access_token = access_token
        self.session = session or requests.Session()
        self.timeout = timeout
        self.guessed_letters = []
        self.char_to_index = char_to_index

        full_dictionary_location = "/content/drive/MyDrive/words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()
        self.current_dictionary = self.full_dictionary.copy()

    @staticmethod
    def determine_hangman_url():
        links = ['https://trexsim.com', 'https://sg.trexsim.com']

        data = {link: 0 for link in links}

        for link in links:

            requests.get(link)

            for i in range(10):
                s = time.time()
                requests.get(link)
                data[link] = time.time() - s

        link = sorted(data.items(), key=lambda x: x[1])[0][0]
        link += '/trexsim/hangman'
        return link

    def build_dictionary(self, dictionary_file_location):
        with open(dictionary_file_location, "r") as file:
            return [line.strip() for line in file]

    def encode_word(self, word):
          # Correctly use char_to_index mapping
          return np.array([self.char_to_index.get(char, 27) for char in word], dtype=np.int8)  # Use 27 for unknown characters

    def algo_guess_prob(self, clean_word, guessed_letters):
        pattern = clean_word.replace("_", ".")
        possible_words = [word for word in self.current_dictionary if re.match(pattern, word) and not any(letter in word for letter in guessed_letters)]
        if not possible_words:
            letter_freq = dict(self.full_dictionary_common_letter_sorted)
        else:
            letters = "".join(possible_words)
            letter_freq = Counter(letters)

        total = sum(letter_freq.values())
        letter_probs = {letter: freq / total for letter, freq in letter_freq.items() if letter not in guessed_letters}

        return letter_probs

    def combine_predictions(self, model_probs, algo_probs, guessed_letters):
        combined_probs = defaultdict(float)
        for i, prob in enumerate(model_probs):
            letter = chr(i + ord('a'))
            if letter not in guessed_letters:
                combined_probs[letter] += prob
        for letter, prob in algo_probs.items():
            combined_probs[letter] += prob

        best_guess = max(combined_probs, key=combined_probs.get)
        return best_guess

    def guess(self, word):
        clean_word = word.replace(" ", "").lower()
        encoded_word = self.encode_word(clean_word)
        padded_word = pad_sequences([encoded_word], maxlen=self.max_length, padding='post')
        model_prediction = self.model.predict(padded_word)[0]

        algo_probs = self.algo_guess_prob(clean_word, self.guessed_letters)
        best_guess = self.combine_predictions(model_prediction, algo_probs, self.guessed_letters)

        self.guessed_letters.append(best_guess)
        return best_guess


    ##########################################################
    # You'll likely not need to modify any of the code below #
    ##########################################################

    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary

    def start_game(self, practice=True, verbose=True):
        # reset guessed letters to empty set and current plausible dictionary to the full dictionary
        self.guessed_letters = []
        self.current_dictionary = self.full_dictionary

        response = self.request("/new_game", {"practice":practice})
        if response.get('status')=="approved":
            game_id = response.get('game_id')
            word = response.get('word')
            tries_remains = response.get('tries_remains')
            if verbose:
                print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}.".format(game_id, tries_remains, word))
            while tries_remains>0:
                # get guessed letter from user code
                guess_letter = self.guess(word)

                # append guessed letter to guessed letters field in hangman object
                self.guessed_letters.append(guess_letter)
                if verbose:
                    print("Guessing letter: {0}".format(guess_letter))

                try:
                    res = self.request("/guess_letter", {"request":"guess_letter", "game_id":game_id, "letter":guess_letter})
                except HangmanAPIError:
                    print('HangmanAPIError exception caught on request.')
                    continue
                except Exception as e:
                    print('Other exception caught on request.')
                    raise e

                if verbose:
                    print("Sever response: {0}".format(res))
                status = res.get('status')
                tries_remains = res.get('tries_remains')
                if status=="success":
                    if verbose:
                        print("Successfully finished game: {0}".format(game_id))
                    return True
                elif status=="failed":
                    reason = res.get('reason', '# of tries exceeded!')
                    if verbose:
                        print("Failed game: {0}. Because of: {1}".format(game_id, reason))
                    return False
                elif status=="ongoing":
                    word = res.get('word')
        else:
            if verbose:
                print("Failed to start a new game")
        return status=="success"

    def my_status(self):
        return self.request("/my_status", {})

    def request(
            self, path, args=None, post_args=None, method=None):
        if args is None:
            args = dict()
        if post_args is not None:
            method = "POST"

        # Add `access_token` to post_args or args if it has not already been
        # included.
        if self.access_token:
            # If post_args exists, we assume that args either does not exists
            # or it does not need `access_token`.
            if post_args and "access_token" not in post_args:
                post_args["access_token"] = self.access_token
            elif "access_token" not in args:
                args["access_token"] = self.access_token

        time.sleep(0.2)

        num_retry, time_sleep = 50, 2
        for it in range(num_retry):
            try:
                response = self.session.request(
                    method or "GET",
                    self.hangman_url + path,
                    timeout=self.timeout,
                    params=args,
                    data=post_args,
                    verify=False
                )
                break
            except requests.HTTPError as e:
                response = json.loads(e.read())
                raise HangmanAPIError(response)
            except requests.exceptions.SSLError as e:
                if it + 1 == num_retry:
                    raise
                time.sleep(time_sleep)

        headers = response.headers
        if 'json' in headers['content-type']:
            result = response.json()
        elif "access_token" in parse_qs(response.text):
            query_str = parse_qs(response.text)
            if "access_token" in query_str:
                result = {"access_token": query_str["access_token"][0]}
                if "expires" in query_str:
                    result["expires"] = query_str["expires"][0]
            else:
                raise HangmanAPIError(response.json())
        else:
            raise HangmanAPIError('Maintype was not text, or querystring')

        if result and isinstance(result, dict) and result.get("error"):
            raise HangmanAPIError(result)
        return result

class HangmanAPIError(Exception):
    def __init__(self, result):
        self.result = result
        self.code = None
        try:
            self.type = result["error_code"]
        except (KeyError, TypeError):
            self.type = ""

        try:
            self.message = result["error_description"]
        except (KeyError, TypeError):
            try:
                self.message = result["error"]["message"]
                self.code = result["error"].get("code")
                if not self.type:
                    self.type = result["error"].get("type", "")
            except (KeyError, TypeError):
                try:
                    self.message = result["error_msg"]
                except (KeyError, TypeError):
                    self.message = result

        Exception.__init__(self, self.message)

In [None]:
api = HangmanAPI(model, max_length, char_to_index, access_token="2cd73ba183167f6584405908c4f641", timeout=2000)


In [None]:
api.start_game(practice=1,verbose=True)
[total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
practice_success_rate = total_practice_successes / total_practice_runs
print('run %d practice games out of an allotted 100,000. practice success rate so far = %.3f' % (total_practice_runs, practice_success_rate))

Successfully start a new game! Game ID: 26e5fb2c5843. # of tries remaining: 6. Word: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ .




Guessing letter: s
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 5, 'word': '_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ '}




Guessing letter: m
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 5, 'word': '_ m _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ '}




Guessing letter: t
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': '_ m _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ '}




Guessing letter: p
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': '_ m _ _ _ _ _ _ _ _ _ _ _ p _ _ _ '}




Guessing letter: u
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': '_ m _ _ _ _ _ _ _ _ u _ _ p _ _ _ '}




Guessing letter: e
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': '_ m e _ _ _ _ _ _ e u _ _ p e _ _ '}




Guessing letter: o
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': '_ m e _ _ _ _ _ o e u _ o p e _ _ '}




Guessing letter: i
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': '_ m e _ i _ _ _ o e u _ o p e _ _ '}




Guessing letter: r
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': '_ m e r i _ _ _ o e u r o p e _ _ '}




Guessing letter: a
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': 'a m e r i _ a _ o e u r o p e a _ '}




Guessing letter: n
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 4, 'word': 'a m e r i _ a n o e u r o p e a n '}




Guessing letter: l
Sever response: {'game_id': '26e5fb2c5843', 'status': 'ongoing', 'tries_remains': 3, 'word': 'a m e r i _ a n o e u r o p e a n '}




Guessing letter: c




Sever response: {'game_id': '26e5fb2c5843', 'status': 'success', 'tries_remains': 3, 'word': 'a m e r i c a n o e u r o p e a n '}
Successfully finished game: 26e5fb2c5843
run 9 practice games out of an allotted 100,000. practice success rate so far = 0.333


