In [1]:
import os
import collections
from tqdm import tqdm
import utils
from deeplearning.train import dl_model

# set cuda device
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

In [2]:
class HangmanLocal(object):
    def __init__(self, access_token=None, session=None, timeout=None):       
        self.guessed_letters = []
        self.misses = []
        
        full_dictionary_location = "./dataset/words_250000_train.txt"
        self.full_dictionary = self.build_dictionary(full_dictionary_location)        
        self.full_dictionary_common_letter_sorted = collections.Counter("".join(self.full_dictionary)).most_common()
        self.n_grams = utils.build_n_gram_from_file(full_dictionary_location)
        self.current_dictionary = []

        self.dl_model = dl_model('test_one')
        

    def guess(self, word): # word input example: "_ p p _ e "
        ###############################################
        # Replace with your own "guess" function here #
        ###############################################

        # clean the word so that we strip away the space characters
        # replace "_" with "." as "." indicates any character in regular expressions
        clean_word = word[::2].replace("_",".")
        len_word = len(clean_word)
        len_right_letters = len(clean_word) - clean_word.count('.')
    
        if len_right_letters == 0 and len_word in utils.LETTER_ORDER_DICT:
            order = utils.LETTER_ORDER_DICT[len_word]
            for letter in order:
                if letter not in self.guessed_letters:
                    # print("first guess: ", letter)
                    return letter
                
        # # get the most common letter from the dictionary   
        
        ngram_probs = utils.get_n_gram_prob(self.n_grams, clean_word, self.guessed_letters)
        # Deep Learning way
        best_chars, nn_probs = self.dl_model.predict(clean_word, self.misses)

        nn_probs = [p if chr(i+97) not in self.misses and chr(i+97) not in clean_word else 0.0 for i,p in enumerate(nn_probs)]
        nn_probs = [p/sum(nn_probs) for p in nn_probs]

        final_probs = nn_probs + ngram_probs
        best_char = chr(final_probs.argmax() + 97)
        
        # # print(best_chars, self.misses, clean_word)
        # for pred in best_chars:
        #     if pred not in self.misses and pred not in clean_word:
        #         best_char = pred
        #         break

        return best_char
                
    ##########################################################
    # You'll likely not need to modify any of the code below #
    ##########################################################
    
    def build_dictionary(self, dictionary_file_location):
        text_file = open(dictionary_file_location,"r")
        full_dictionary = text_file.read().splitlines()
        text_file.close()
        return full_dictionary
                    
    def start_game(self, word_to_guess, practice=True, verbose=True):
        # reset guessed letters to empty set and current plausible dictionary to the full dictionary
        self.guessed_letters = []
        self.misses = []
        self.current_dictionary = self.full_dictionary

        game_id = 1
        word = "_ " * len(word_to_guess)
        letter_remains = len(word_to_guess)
        tries_remains = 6  # Set a fixed number of tries
        if verbose:
            print("Successfully start a new game! Game ID: {0}. # of tries remaining: {1}. Word: {2}.".format(game_id, tries_remains, word))
        while tries_remains > 0:
            # get guessed letter from user code
            guess_letter = self.guess(word)

            # append guessed letter to guessed letters field in hangman object
            self.guessed_letters.append(guess_letter)
            if verbose:
                print("Guessing letter: {0}".format(guess_letter))

            if guess_letter in word_to_guess:
                # Replace the underline with the guessed letter
                for i in range(len(word_to_guess)):
                    if word_to_guess[i] == guess_letter:
                        letter_remains -= 1
                        word = word[:2 * i] + guess_letter + word[2 * i + 1:]
                if verbose:
                    print("Successfully guessed letter: {0}. Word: {1}".format(guess_letter, word))
                if letter_remains == 0:
                    if verbose:
                        print("Successfully finished game: {0}".format(game_id))
                    return True
            else:
                self.misses.append(guess_letter)
                tries_remains -= 1  # Decrease the number of tries if the guess was wrong
                if verbose:
                    print("Failed. # of tries remaining: {1}".format(guess_letter, tries_remains))

            if tries_remains == 0:
                if verbose:
                    print("Failed game: {0}. Because of: # of tries exceeded!".format(game_id))
                return False

        return False

In [3]:
game = HangmanLocal()
# from Hangman import HangmanAPI
# game = HangmanAPI()

Architecture: GRU_4_1024_26
models/GRU_4_1024_26/best_GRU_4_1024.pth
Loaded pretrained model from: models/GRU_4_1024_26/best_GRU_4_1024.pth


In [4]:
# test
word = "claim"
game.start_game(word)

Successfully start a new game! Game ID: 1. # of tries remaining: 6. Word: _ _ _ _ _ .
Guessing letter: s
Failed. # of tries remaining: 5
Guessing letter: e
Failed. # of tries remaining: 4
Guessing letter: a
Successfully guessed letter: a. Word: _ _ a _ _ 
Guessing letter: r
Failed. # of tries remaining: 3
Guessing letter: l
Successfully guessed letter: l. Word: _ l a _ _ 
Guessing letter: c
Successfully guessed letter: c. Word: c l a _ _ 
Guessing letter: m
Successfully guessed letter: m. Word: c l a _ m 
Guessing letter: i
Successfully guessed letter: i. Word: c l a i m 
Successfully finished game: 1


True

In [5]:
# # load words randomly from the words.txt
# with open("words_alpha.txt", "r") as text_file:
#     words = text_file.read().splitlines()
# print("Total words: {0}".format(len(words)))

# # remove words that contain in words_250000_train.txt
# with open("words_250000_train.txt", "r") as text_file:
#     full_dictionary = text_file.read().splitlines()
# words = [word for word in words if word not in full_dictionary]

# print("Total words: {0}".format(len(words)))
# # save to words_not_contained.txt
# with open("words_not_contained.txt", "w") as text_file:
#     for word in words:
#         text_file.write(word + "\n")

# load words from words_not_contained.txt
with open("./dataset/words_not_contained.txt", "r") as text_file:
    words = text_file.read().splitlines()

# filter words with length less than 5
words = [word for word in words if len(word) > 5]

print("Total words: {0}".format(len(words)))

# shuffle the words
import random

final_rates = []
# run five times
for i in range(5):
    print("Running {0}th time...".format(i + 1))
    random.shuffle(words)

    test_times = 1000

    # start the game
    win = 0
    for i in tqdm(range(test_times)):
        if game.start_game(words[i], verbose=False):
            win += 1
    print("Winning rate: {0}/{1}={2}".format(win, test_times, win / test_times))
    final_rates.append(win / test_times)
print("Final winning rate: {0}".format(sum(final_rates) / len(final_rates)))

Total words: 172248
Running 1th time...


100%|██████████| 1000/1000 [00:42<00:00, 23.36it/s]


Winning rate: 729/1000=0.729
Running 2th time...


100%|██████████| 1000/1000 [00:43<00:00, 23.09it/s]


Winning rate: 722/1000=0.722
Running 3th time...


100%|██████████| 1000/1000 [00:43<00:00, 22.83it/s]


Winning rate: 732/1000=0.732
Running 4th time...


100%|██████████| 1000/1000 [00:43<00:00, 23.13it/s]


Winning rate: 719/1000=0.719
Running 5th time...


100%|██████████| 1000/1000 [00:42<00:00, 23.27it/s]

Winning rate: 751/1000=0.751
Final winning rate: 0.7305999999999999





# Results

<!-- make table -->
| Model |  Score | running time|
|-------|--------------|--------|
|baseline| 21.04 | - |
|del_len_match| 35.00 | - |
|re.search| 38.16 | 3:00 |
|first order| 38.04 | 1:33 |
|NLP | 56.1 | 0:01 |
|NLP+ | 61.38 | 0:01 |
|NLP + first order | 62.28 | 0:01 |
|NLP + 2/4 | 64.32 | 0:07 |
|NLP + 2/4 + first order | 62.52 | 0:07 |
|NLP + 2/4 + 2/5 | 65.04 | 0:13 |
|NLP + 2/4 + 2/5 + first | 66.16 | 0:12 |
|NLP + 2/4 + 2/5 + 3/5 + first | 65.90 | 2:18 |
|GRU-2-512 | 48.88 | 1:38 |
|GRU-2-1024 + interval-1| 64.90 | 0:40 |
|GRU-4-1024 + interval-1| 69.36 | 0:40 |

# References
- https://github.com/dwyl/english-words