In [42]:
import heapq
import random


In [43]:
# Initializing list of all possible wordle guesses
possible_words = []
with open('common.txt') as file:
    for line in file:
        possible_words.append(line.replace('\n',''))

In [44]:
# This function determines if a guess is valid given the feedback that's been given from guesses till this point
# yellow letters is a list of all letters of the guess that are correct but in the wrong position
# green letters is a tuple list of all leters of the guess that are in the correct position and spot with
# the letters respective position in the tuple list, example green_letters = [('h',2), ('e',3)]
def valid_guess(guess, green_letters, yellow_letters, gray_letters):
    for letter_tuple in yellow_letters:
        if letter_tuple[0] not in guess or guess[letter_tuple[1]] == letter_tuple[0]:
            return False
    for letter_tuple in green_letters:
        if guess[letter_tuple[1]] != letter_tuple[0]:
            return False
    for letter in gray_letters:
        if letter in guess:
            return False
    return True
    

In [45]:
# This function provides feedback for a guess given some answer, the notation of the feedback is seen below
# For a given word heart, the notation used throughout the model is as follows
# Each letter is given a score of 0, 1, or 2. 0 means that the letter does not exist in the correct answer. 
# 1 means the letter is yellow, correct letter wrong spot. 2 menas the lettter is green, correct letter and correct
# spot. if the answer is harsh and our guess is heart, then the resulting notation would be:
# [('h',2),('e',0),('a',1),('r',0), ('t',0)]
def score_guess(answer, guess):
    guess_feedback = []
    for i in range(5):
        if guess[i] == answer[i]:
            guess_feedback.append((guess[i],2))
        elif guess[i] in answer:
            guess_feedback.append((guess[i],1))
        else:
            guess_feedback.append((guess[i],0))
    return guess_feedback

In [46]:
# This function takes feedback in the above form and converts it into a list of green, yellow, and gray letters
# green letters and yellow letters are stored in tuples of the letter and their index within the word
# gray letters are simply a list of all gray letters
def get_green_yellow_gray_letters(guess_feedback):
    green_letters = []
    yellow_letters = []
    gray_letters = []
    for index, feedback_tuple in enumerate(guess_feedback):
        if feedback_tuple[1] == 2:
            green_letters.append((feedback_tuple[0], index))
        elif feedback_tuple[1] == 1:
            yellow_letters.append((feedback_tuple[0], index))
        else:
            gray_letters.append(feedback_tuple[0])
    return green_letters, yellow_letters, gray_letters

In [47]:
# def predict_wordle(answer, starting_word):
#     possible_guesses = possible_words
#     all_guesses = [starting_word]
#     min_guess = starting_word
#     final_answer = answer
#     gray_letters = []
#     while min_guess != final_answer:
#         guess_feedback = score_guess(final_answer,min_guess)
#         green_letters, yellow_letters, current_gray_letters = get_green_yellow_gray_letters(guess_feedback)
#         gray_letters = gray_letters + current_gray_letters
#         new_possible_guesses = []
#         for guess in possible_guesses:
#             if valid_guess(guess, green_letters, yellow_letters, gray_letters):
#                 new_possible_guesses.append(guess)
#         possible_guesses = new_possible_guesses
#         guess_answer_matrix = {}
#         for guess in possible_guesses:
#             guess_answer_matrix[guess] = {}
#             for answer in possible_guesses:
#                 guess_feedback = score_guess(answer,guess)
#                 curr_green_letters, curr_yellow_letters, curr_gray_letters = get_green_yellow_gray_letters(guess_feedback)
#                 valid_guesses = 0
#                 new_possible_guesses = possible_guesses.copy()
#                 new_possible_guesses.remove(guess)
#                 for option in new_possible_guesses:
#                     if valid_guess(option,curr_green_letters,curr_yellow_letters, curr_gray_letters):
#                         valid_guesses += 1
#                 guess_answer_matrix[guess][answer] = valid_guesses
#         min_guess_val = 3000
#         for guess in guess_answer_matrix:
#             max_possible_guesses = max(guess_answer_matrix[guess].values()) 
#             if max_possible_guesses < min_guess_val and guess != min_guess:
#                 min_guess_val = max_possible_guesses
#                 min_guess = guess
#         possible_guesses.remove(min_guess)
#         all_guesses.append(min_guess)

#     return all_guesses
# print(predict_wordle('foyer','slate'))


In [48]:
# This is the main wordle prediction algorithm. It takes in the final_answer we are trying to find, a current working
# list of possible guesses, a list of gray letters, and the current guess we have just made
# It then finds the next guess using a greedy min-max approach 
def predict_wordle_guess(final_answer, possible_guesses, gray_letters, current_guess):
    # Getting feedback on our current guess based on the answer we're working towards 
    guess_feedback = score_guess(final_answer, current_guess)
    green_letters, yellow_letters, current_gray_letters = get_green_yellow_gray_letters(guess_feedback)
    new_gray_letters = gray_letters + current_gray_letters
    new_possible_guesses = []
    # Based on the feedback of the guess, narrowing down our list of possible guesses we can chose from
    for guess in possible_guesses:
        if valid_guess(guess, green_letters, yellow_letters, new_gray_letters):
            new_possible_guesses.append(guess)
            
    # Creating the guess answer matrix. This matrix is a matrix of all the possible words by all the possible words.
    # The rows represent the possible guesses and the columns represent the possible answers
    # The value at (guess, answer) represents the number of new possible guesses that exist if we guessed 'guess'
    # and the final answer we were working towards was 'answer'.
    possible_guesses = new_possible_guesses
    guess_answer_matrix = {}
    for guess in possible_guesses:
        guess_answer_matrix[guess] = {}
        for answer in possible_guesses:
            guess_feedback = score_guess(answer,guess)
            curr_green_letters, curr_yellow_letters, curr_gray_letters = get_green_yellow_gray_letters(guess_feedback)
            valid_guesses = 0
            new_possible_guesses = possible_guesses.copy()
            new_possible_guesses.remove(guess)
            for option in new_possible_guesses:
                if valid_guess(option,curr_green_letters,curr_yellow_letters, curr_gray_letters):
                    valid_guesses += 1
            guess_answer_matrix[guess][answer] = valid_guesses
    # Given this matrix of possible guesses for each guess, answer pair, we find the guess that minimizes the 
    # maximum new possible guesses across all possible answers. This is a greedy minmax approach
    max_guess_dict = {}
    for guess in guess_answer_matrix:
        max_possible_guesses = max(guess_answer_matrix[guess].values())/len(guess_answer_matrix[guess].values())
        max_guess_dict.setdefault(max_possible_guesses,[]).append(guess)
        
    new_guess = random.choice(max_guess_dict[min(max_guess_dict.keys())])
    print(max_guess_dict[min(max_guess_dict.keys())])

    return new_gray_letters, possible_guesses, new_guess



In [49]:
new_guess = 'crane'
answer = 'askew'
possible_guesses = possible_words
gray_letters = []
while new_guess != answer:
    print(new_guess)
    gray_letters, possible_guesses, new_guess = predict_wordle_guess(answer, possible_guesses, gray_letters, new_guess)

print(new_guess)
print('done')

crane
['tales']
tales
['ashed', 'asked', 'askew']
asked
['askew']
askew
done
