# Information Theory
## September 27th, 2022
### Overview: Maximizing entropy in order to most efficiently solve the game Wordle

In [1]:
import numpy as np
import wordle

In [2]:
# Problem 1
def get_guess_result(true_word, guess):
    """
    Returns an array containing the result of a guess, with the return values as follows:
        2 - correct location of the letter
        1 - incorrect location but present in word
        0 - not present in word
    For example, if the true word is "boxed" and the provided guess is "excel", the 
    function should return [0,1,0,2,0].
    
    Arguments:
        true_word (string) - the secret word
        guess (string) - the guess being made
    Returns:
        result (array of integers) - the result of the guess, as described above
    """
    #cast words to arrays
    truth = np.array(list(true_word))
    garray = np.array(list(guess))
    
    #will use indices for masking
    inds = np.arange(5)
    
    #first find all exact matches
    mask = truth == garray
    result = 2*mask
    
    #if this guess was correct, just return
    if all(mask): return result
    
    #remaining letters of true word
    rem_truth = truth[~mask]
    
    #next fill in 1s
    #for each character and its corresponding index in the guess array
    for i, char in enumerate(garray):
        #if the index is one of the non-matching indices 
         #and the corresponding character is in the non-matched portion of the true word, 
            #set result at that index to 1
        if i in inds[~mask] and char in rem_truth: 
            #make 'yellow' in wordle context
            result[i] = 1
            #remove char from the remaining letters of truth
            ind = np.where(rem_truth==char)[0][0]
            rem_truth = np.delete(rem_truth, ind)
            
    #in the above we do not want to check if the character is in a[~mask] bc... idr
            
    return result

In [3]:
# Problem 2
def load_words(filen):
    """
    Loads all of the words from the given file, ensuring that they 
    are formatted correctly.
    """
    with open(filen, 'r') as file:
        # Get all 5-letter words
        words = [line.strip() for line in file.readlines() if len(line.strip()) == 5]
    return words

In [4]:
possible_words = load_words('possible_words.txt')
allowed_words = load_words('allowed_words.txt')

## The following 4 cells create, save, and load a file that is much too large to include in this upload.

In [5]:
def get_all_guess_results(possible_words, allowed_words):
    """
    Calculates the result of making every guess for every possible secret word
    
    Arguments:
        possible_words (list of strings)
            A list of all possible secret words
        allowed_words (list of strings)
            A list of all allowed guesses
    Returns:
        ((n,m,5) ndarray) - the results of each guess for each secret word,
            where n is the the number
            of allowed guesses and m is number of possible secret words.
    """
    #initialize the final 3d array
    ARRAY = []
    
    #for each word allowed to be guessed
    for allowed_word in allowed_words:
        #initialize intermediate 2d array
        array = []
        
        #for each word that could be the secret word
        for poss_word in possible_words:
            #get the guess result, append to 2d array
            array.append( get_guess_result(poss_word,allowed_word))
            
        #append 2d array to 3d array
        ARRAY.append(np.array(array))
            
    return np.array(ARRAY)

In [6]:
#created_big = get_all_guess_results(possible,allowed)

In [7]:
#np.save('all_guess_results',created_big)

In [8]:
#big = np.load('all_guess_results.npy')

In [9]:
# Problem 3
def compute_highest_entropy(all_guess_results, allowed_words):
    """
    Compute the entropy of each guess.
    
    Arguments:
        all_guess_results ((n,m,5) ndarray) - the output of the function
            from Problem 2, containing the results of each 
            guess for each secret word, where n is the the number
            of allowed guesses and m is number of possible secret words.
        allowed_words (list of strings) - list of the allowed guesses
    Returns:
        (string) The highest-entropy guess
        (int) Index of the highest-entropy guess
    """
    #make an array of 3^i for each index in [0,4]
    powers = np.array([3**i for i in range(5)])
    
    #get base 3 interpretations of all guess results
    base3 = np.sum(powers*all_guess_results,axis=2)
    
    #init list to hold entropies
    entropies = []
    
    #for each allowed word that's been converted to base3
    for row in base3:
        #get the unique values and the number of their occurrences
        values, counts = np.unique(row,return_counts=True)
        
        #divide all the counts by the total number of counts
        fracs = counts/counts.sum()
        #get an array of negative log_2
        logs = -np.log2(fracs)
        
        #calculate the entropy, append
        entropy = (fracs*logs).sum()
        entropies.append(entropy)
    
    #get index of max entropy
    argmax = np.argmax(entropies)
    
    return allowed_words[argmax], argmax

### The highest entropy word to initially guess is "soare"

In [11]:
word, arg = compute_highest_entropy(big, allowed_words)
word

'soare'

In [12]:
# Problem 4
def filter_words(all_guess_results, possible_words, guess_idx, result):
    """
    Create a function that filters the list of possible words after making a guess.
    Since we already computed the result of all guesses for all possible words in 
    Problem 2, we will use this array instead of recomputing the results.
    
	Return a filtered list of possible words that are still possible after 
    knowing the result of a guess. Also return a filtered version of the array
    of all guess results that only contains the results for the secret words 
    still possible after making the guess. This array will be used to compute 
    the entropies for making the next guess.
    
    Arguments:
        all_guess_results (3-D ndarray)
            The output of Problem 2, containing the result of making
            any allowed guess for any possible secret word
        possible_words (list of str)
            The list of possible secret words
        guess_idx (int)
            The index of the guess that was made in the list of allowed guesses.
        result (tuple of int)
            The result of the guess
    Returns:
        (list of str) The filtered list of possible secret words
        (3-D ndarray) The filtered array of guess results
    """
    #access the big array at the index of the guessed word: is now a 2d matrix of results with each possible word
    all_poss = all_guess_results[guess_idx]
    
    #make an array of 3^i for each index in [0,4]
    powers = np.array([3**i for i in range(5)])
    
    #get indices of matched rows
    inds = np.argwhere(np.sum(all_poss*powers,axis=1) == np.sum(result*powers)).flatten()
    
    
    return np.array(possible_words)[inds].tolist(), all_guess_results[:,inds]

In [13]:
# Problem 5
def play_game_naive(game, all_guess_results, possible_words, allowed_words, word=None, display=False,):
    """
    Plays a game of Wordle using the strategy of making guesses at random.
    
    Return how many guesses were used.
    
    Arguments:
        game (wordle.WordleGame)
            the Wordle game object
        all_guess_results ((n,m,5) ndarray)
            an array as outputted by problem 2 containing the results of every guess for every secret word.
        possible_words (list of str)
            list of possible secret words
        allowed_words (list of str)
            list of allowed guesses
        
        word (optional)
            If not None, this is the secret word; can be used for testing. 
        display (bool)
            If true, output will be printed to the terminal by the game.
    Returns:
        (int) Number of guesses made
    """
    # Initialize the game
    game.start_game(word=word, display=display)
    
    while not game.game_finished:
        guess = input('input guess: ')
        
        if guess == 'random':
            guess = np.random.choice(allowed_words)
        game.make_guess(guess,display=True)
        
    return game.guess_ct

In [14]:
game = wordle.WordleGame()

In [15]:
# Problem 6
def play_game_entropy(game, all_guess_results, possible_words, allowed_words, word=None, display=False):
    """
    Plays a game of Wordle using the strategy of guessing the maximum-entropy guess.
    
    Return how many guesses were used.
    
    Arguments:
        game (wordle.WordleGame)
            the Wordle game object
        all_guess_results ((n,m,5) ndarray)
            an array as outputted by problem 2 containing the results of every guess for every secret word.
        possible_words (list of str)
            list of possible secret words
        allowed_words (list of str)
            list of allowed guesses
        
        word (optional)
            If not None, this is the secret word; can be used for testing. 
        display (bool)
            If true, output will be printed to the terminal by the game.
    Returns:
        (int) Number of guesses made
    """
    # Initialize the game
    game.start_game(word=word, display=display)
    
    #copy all these results because they need to be modified
    guess_rslts = all_guess_results.copy()
    poss_words = possible_words.copy()
    allow_words = allowed_words.copy()
    
    #while the game is still going
    while not game.game_finished:   
        guess = input("Type 'e' for entropy based guess: ")
        if guess == "e":
            #if we know the word, guess it
            if len(poss_words) == 1:
                guess = poss_words[0]
                
            #otherwise guess highest entropy word
            else:
                #if we've already made a guess and got its result, filter
                if game.guess_ct > 0:
                    poss_words, guess_rslts = filter_words(guess_rslts, poss_words, guess_idx, result)
                
                #get guess using highest entropy
                guess, guess_idx = compute_highest_entropy(guess_rslts, allow_words)

        result = game.make_guess(guess)[0]
    return game.guess_ct

In [19]:
play_game_entropy(game,big,possible_words,allowed_words,word='alert',display=True)

Type 'e' for entropy based guess:  e


      ---+++---
 s  o -a-+r+-e-
      ---+++---


Type 'e' for entropy based guess:  e


++++++---   ---
+a++l+-t- a -r-
++++++---   ---


Type 'e' for entropy based guess:  e


+++      ---   
+a+ a  h -e- d 
+++      ---   


Type 'e' for entropy based guess:  e


+++++++++++++++
+a++l++e++r++t+
+++++++++++++++
Congratulations!
Number of guesses: 4


4