In [1]:
import os
import gzip
import pickle
import math
import random
import numpy as np
import pandas as pd
from scipy.stats import entropy
import altair as alt
from wordfreq import word_frequency
from IPython.display import clear_output

In [2]:
def get_guess_result(answer_word, guess_word):

    guess_result = [0, 0, 0, 0, 0]
    for i in range(5):
        if guess_word[i] == answer_word[i]:
            guess_result[i] = 2
    
    letter_count = {}
    for i in range(5):
        if guess_result[i] != 2:
            if answer_word[i] not in letter_count:
                letter_count[answer_word[i]] = 1
            else:
                letter_count[answer_word[i]] += 1
    
    for i in range(5):
        if guess_result[i] != 2 and guess_word[i] in letter_count:
            if letter_count[guess_word[i]] > 0:
                guess_result[i] = 1
                letter_count[guess_word[i]] -= 1

    return int(''.join(str(n) for n in guess_result), 3)

######################################################################################################################################################

def display_result(answer_word, former_guesses, former_guess_results, num_choices_left, total_guess_number, result_distribution):

    former_guesses = former_guesses[-6:]
    former_guess_results = former_guess_results[-6:]
    num_choices_left = num_choices_left[-6:]

    ternary_string_guess_results = []
    for number in former_guess_results:
        ternary_string_guess_results.append(np.base_repr(number, base = 3).zfill(5))

    if former_guess_results[-1] != 242:
        if total_guess_number == 0:
            average_guess = 0
        else:
            average_guess = total_guess_number / possible_words_list.index(answer_word)
    else:
        average_guess = total_guess_number / (possible_words_list.index(answer_word) + 1)

    message_1 = ''.join(['| ', '{:4.0f}'.format(possible_words_list.index(answer_word) + 1), ' of ', str(len(possible_words_list)), ' | word: ', answer_word, ' |'])
    message_2 = ''.join(['| total guess: ', '{:5.0f}'.format(total_guess_number), ' | average guess: ', '{:5.4f}'.format(average_guess), ' | guess number distribution: ', str(result_distribution), ' |'])

    print(message_1)
    print('-------------------------')

    for result_display_line in range(6):
        if result_display_line < len(ternary_string_guess_results):
            result_string = ternary_string_guess_results[result_display_line]
            new_result_string = ''
            for string_bit in range(5):
                if result_string[string_bit] == '0':
                    new_result_string += '\U00002B1B'
                elif result_string[string_bit] == '1':
                    new_result_string += '\U0001F7E8'
                elif result_string[string_bit] == '2':
                    new_result_string += '\U0001F7E9'
            new_result_string += '  '
            new_result_string += former_guesses[result_display_line]
            new_result_string += '  '
            new_result_string += str(num_choices_left[result_display_line])
            print(new_result_string)
        else:
            print()

    print('-------------------------')
    print(message_2)

######################################################################################################################################################

def get_next_guess(former_guesses, former_guess_results):

    possible_choices = np.arange(len(possible_words_list))
    for i in range(len(former_guesses)):
        possible_choices = np.intersect1d(possible_choices, np.where(guess_result_matrix[allowed_words_list.index(former_guesses[i])] == former_guess_results[i]))
    
    if len(former_guesses) == 1:
        return best_second_guess_dict[former_guess_results[0]], len(possible_choices)
    
    else:
        if len(possible_choices) == 1:
            return possible_words_list[possible_choices[0]], len(possible_choices)
        else:
            sub_matrix = guess_result_matrix[np.ix_(np.arange(len(guess_result_matrix)), possible_choices)]
            entropy_value_array = []
            for i in range(len(sub_matrix)):
                unique, counts = np.unique(sub_matrix[i], return_counts = True)
                entropy_value_array.append(entropy(counts, base = 2))

            argmax = np.where(entropy_value_array == np.max(entropy_value_array))
            if np.in1d(np.array(possible_words_indices)[possible_choices], argmax).any():
                best_next_guess = allowed_words_list[np.array(possible_words_indices)[possible_choices][np.in1d(np.array(possible_words_indices)[possible_choices], argmax)][0]]
            else:
                best_next_guess = allowed_words_list[np.argmax(entropy_value_array)]
                
            return best_next_guess, len(possible_choices)

######################################################################################################################################################

def demo():
    
    np.set_printoptions(suppress = True)
    
    result_distribution = np.zeros(7)
    total_guess_number = 0

    for answer_word in possible_words_list:

        first_guess = 'soare'
        guess_number = 0
        former_guesses = []
        former_guess_results = []
        num_choices_left = []

        guess_word = first_guess
        while answer_word != guess_word:
            guess_number += 1
            guess_result = get_guess_result(answer_word, guess_word)
            former_guesses.append(guess_word)
            former_guess_results.append(guess_result)
            next_guess, len_possible_choices = get_next_guess(former_guesses, former_guess_results)
            num_choices_left.append(len_possible_choices)
            guess_word = next_guess

            clear_output(wait = True)
            display_result(answer_word, former_guesses, former_guess_results, num_choices_left, total_guess_number, result_distribution)

        former_guesses.append(answer_word)
        former_guess_results.append(242)
        num_choices_left.append('')
        guess_number += 1
        total_guess_number += guess_number
        if guess_number <= 6:
            result_distribution[guess_number - 1] += 1
        else:
            result_distribution[6] += 1

        clear_output(wait = True)
        display_result(answer_word, former_guesses, former_guess_results, num_choices_left, total_guess_number, result_distribution)

    display(alt.Chart(pd.DataFrame({'number of guesses': [str(num) for num in np.arange(len(result_distribution) - 1) + 1] + ['> 6'], 'count': result_distribution})).mark_bar().encode(alt.X('number of guesses:O'), alt.Y('count:Q'), tooltip = ['number of guesses', 'count']).properties(width = 500, height = 200).configure_axisX(labelAngle = 0))


In [3]:
with open('possible_words.txt') as file:
    possible_words = file.readlines()
possible_words_list = [word[:5] for word in possible_words]

with open('allowed_words.txt') as file:
    allowed_words = file.readlines()
allowed_words_list = [word[:5] for word in allowed_words]

######################################################################################################################################################

if 'guess_result_matrix.npy' not in os.listdir():
    guess_result_matrix = []
    for word1 in allowed_words_list:
        word_guess_result_array = []
        for word2 in allowed_words_list:
            word_guess_result_array.append(get_guess_result(word2, word1))
        guess_result_matrix.append(word_guess_result_array)
    np.save('guess_result_matrix.npy', guess_result_matrix)
else:
    guess_result_matrix = np.load('guess_result_matrix.npy')

possible_words_indices = []
for i in range(len(possible_words_list)):
    possible_words_indices.append(allowed_words_list.index(possible_words_list[i]))
guess_result_matrix = guess_result_matrix[np.ix_(np.arange(len(guess_result_matrix)), possible_words_indices)]

######################################################################################################################################################

if 'best_second_guess_dict_v3_soare.pkl' not in os.listdir():

    unique, counts = np.unique(guess_result_matrix[allowed_words_list.index('soare')], return_counts = True)

    best_second_guess_array = []
    for possible_result in unique:

        possible_choices = np.where(guess_result_matrix[allowed_words_list.index('soare')] == possible_result)[0]

        if len(possible_choices) == 1:
            best_second_guess = possible_words_list[possible_choices[0]]
        else:
            sub_matrix = guess_result_matrix[np.ix_(np.arange(len(guess_result_matrix)), possible_choices)]
            entropy_value_array = []
            for i in range(len(sub_matrix)):
                unique_2, counts_2 = np.unique(sub_matrix[i], return_counts = True)
                entropy_value_array.append(entropy(counts_2, base = 2))

            argmax = np.where(entropy_value_array == np.max(entropy_value_array))
            if np.in1d(np.array(possible_words_indices)[possible_choices], argmax).any():
                best_second_guess = allowed_words_list[np.array(possible_words_indices)[possible_choices][np.in1d(np.array(possible_words_indices)[possible_choices], argmax)][0]]
            else:
                best_second_guess = allowed_words_list[np.argmax(entropy_value_array)]

        best_second_guess_array.append(best_second_guess)

    best_second_guess_dict = dict(zip(unique, best_second_guess_array))
    pickle.dump(best_second_guess_dict, open('best_second_guess_dict_v3_soare.pkl', 'wb'))

else:
    best_second_guess_dict = pickle.load(open('best_second_guess_dict_v3_soare.pkl', 'rb'))

In [4]:
first_guess_entropy_array = []
for i in range(len(guess_result_matrix)):
    unique, counts = np.unique(guess_result_matrix[i], return_counts = True)
    first_guess_entropy_array.append(entropy(counts, base = 2))
    
allowed_words_list[np.argmax(first_guess_entropy_array)]

'soare'

In [5]:
demo()

| 2309 of 2309 | word: zonal |
-------------------------
â¬›ðŸŸ©ðŸŸ¨â¬›â¬›  soare  21
ðŸŸ¨â¬›â¬›ðŸŸ©ðŸŸ¨  liman  2
â¬›ðŸŸ©ðŸŸ©ðŸŸ©ðŸŸ©  tonal  1
ðŸŸ©ðŸŸ©ðŸŸ©ðŸŸ©ðŸŸ©  zonal  


-------------------------
| total guess:  7998 | average guess: 3.4638 | guess number distribution: [   0.   45. 1212.  989.   62.    1.    0.] |
