# Boggle Word Finder

In [30]:
import random
import numpy as np
import urllib.request

## Getting a Boggle Board

In [2]:
chars = 'abcdefghijklmnopqrstuvwxyz'

In [3]:
def get_random_char():
    return chars[random.randint(1,25)]

In [7]:
def get_random_boggle_board(x_dim=4, y_dim=4):
    return [[get_random_char() for i in range(x_dim)] for j in range(y_dim)]

In [8]:
boggle_board = get_random_boggle_board(); boggle_board

[['t', 'j', 'n', 'r'],
 ['q', 'o', 'l', 'k'],
 ['p', 'i', 'e', 'h'],
 ['p', 'f', 'q', 's']]

## Building a Dictionary Trie

The boggle board will use a trie as an index for quickly checking the words availble in the board

In [11]:
term_char = ''

Here is an example of what the trie looks like:

In [20]:
dict_trie = {'a': {term_char: True, 
                   'n': {term_char: True}, 
                   'b': {term_char: True}},
             }
dict_trie['a']['n'][term_char]

True

In [18]:
def build_dict_trie(dict_words):
    dict_trie = {}
    for word in dict_words:
        trie_node = dict_trie
        for char in word:
            if char not in trie_node:
                trie_node[char] = {}
            trie_node = trie_node[char]
        trie_node[term_char] = True
    return dict_trie            

In [34]:
url_for_10000_most_common_en_words = 'https://raw.githubusercontent.com/first20hours/google-10000-english/master/google-10000-english-no-swears.txt'
en_words = [];
with urllib.request.urlopen(url_for_10000_most_common_en_words) as response:
    en_words = response.read().decode('utf-8').split('\n')

In [37]:
en_words[:10]

['the', 'of', 'and', 'to', 'a', 'in', 'for', 'is', 'on', 'that']

It turns out there are a ton of weird short words in that dataset. I'm assuming they must be acronyms or IM shorthand or something. I wanted to filter down to a smaller set of 1 and 2 character words:

In [55]:
allowed_one_and_two_char_words = [ # 2 char words taken from: http://wordfinder.yourdictionary.com/letter-words/2
    'a', 
    'i', 
    "jo",
    "za",
    "qi",
    "xu",
    "xi",
    "ox",
    "ax",
    "ex",
    "mm",
    "hm",
    "by",
    "my",
    "up",
    "mu",
    "ki",
    "um",
    "ka",
    "fe",
    "fa",
    "of",
    "ma",
    "op",
    "pe",
    "pa",
    "if",
    "pi",
    "me",
    "ow",
    "am",
    "mi",
    "om",
    "ba",
    "aw",
    "we",
    "wo",
    "uh",
    "be",
    "ab",
    "em",
    "bi",
    "ef",
    "mo",
    "bo",
    "nu",
    "ag",
    "oh",
    "go",
    "ay",
    "eh",
    "ah",
    "ye",
    "ya",
    "oy",
    "un",
    "he",
    "hi",
    "ho",
    "sh",
    "ha",
    "yo",
    "us",
    "ut",
    "on",
    "li",
    "id",
    "in",
    "an",
    "de",
    "do",
    "en",
    "el",
    "ed",
    "al",
    "la",
    "lo",
    "ne",
    "ad",
    "no",
    "od",
    "na",
    "as",
    "ai",
    "ar",
    "ae",
    "er",
    "at",
    "si",
    "it",
    "is",
    "aa",
    "os",
    "oi",
    "or",
    "et",
    "re",
    "ta",
    "ti",
    "so",
    "oe",
    "es",
    "to"
]

In [61]:
en_words_filtered_short_words = [w for w in en_words if len(w) > 2] + allowed_one_and_two_char_words

In [62]:
dict_trie = build_dict_trie(en_words_filtered_short_words)

## Get all Words Available in a Boggle Board

In [224]:
def get_boggle_words(boggle_board):
    words = set()
    board_shape = np.shape(boggle_board)
    for j,i in np.ndindex(board_shape):
        next_char = boggle_board[j][i]
        words |= get_boggle_words_from_index(boggle_board, next_char, ((i,j),), dict_trie[next_char], board_shape)
    return words

In [225]:
def is_valid_new_index(new_idx, idxs, board_shape):
    i,j = new_idx
    y_dim, x_dim = board_shape
    return 0 <= i < x_dim and 0 <= j < y_dim and new_idx not in idxs

In [226]:
def get_boggle_words_from_index(boggle_board, prev_chars, idxs, trie_node, board_shape):
    i,j = idxs[-1]
    words = set()
    if term_char in trie_node:
        words.add(prev_chars)

    for x in [-1,0,1]:
        for y in [-1,0,1]:
            new_i = i + x
            new_j = j + y
            new_idx = (new_i, new_j);
            if is_valid_new_index(new_idx, idxs, board_shape):
                new_idxs = idxs + (new_idx,)
                next_char = boggle_board[new_j][new_i]
                if next_char in trie_node:
                    words |= get_boggle_words_from_index(boggle_board, 
                                                         prev_chars+next_char, 
                                                         new_idxs, 
                                                         trie_node[next_char], 
                                                         board_shape)
    return words

In [227]:
get_boggle_words([['a']])

{'a'}

In [228]:
get_boggle_words([['a', 'n']])

{'a', 'an', 'na'}

In [229]:
get_boggle_words([['a', 'z'],
                  ['b', 'n']])

{'a', 'ab', 'an', 'ba', 'ban', 'na', 'nba', 'za'}

In [251]:
boggle_board = get_random_boggle_board() 
boggle_board, get_boggle_words(boggle_board)

([['n', 'e', 'b', 'z'],
  ['l', 'c', 'k', 't'],
  ['x', 'r', 'g', 'v'],
  ['b', 'y', 'm', 'd']],
 {'be',
  'ben',
  'by',
  'crm',
  'cry',
  'el',
  'en',
  'gtk',
  'gym',
  'ken',
  'len',
  'my',
  'ne',
  'nec',
  'neck'})

## What About an Object Oriented Approach

In [150]:
class BoggleNode:
    def __init__(self, char=None):
        self.char = char or get_random_char();
        self.adjacent_nodes = [];
        
    def add_adjacent_node(self, node):
        self.adjacent_nodes.append(node)

In [249]:
class BoggleBoard:
    def __init__(self, shape=(4,4)):
        self.shape = shape
        self.board = [[BoggleNode() for i in range(shape[1])] for j in range(shape[0])]
        self._initialize_node_links_()
    
    def _initialize_node_links_(self):
        for j,i in np.ndindex(self.shape):
            for offset_j, offset_i in np.ndindex((3,3)):
                j2 = j - 1 + offset_j
                i2 = i - 1 + offset_i
                is_self_node = j2 == j and i2 == i
                if (not is_self_node) and 0 <= j2 < self.shape[0] and 0 <= i2 < self.shape[1]:
                    self.board[j][i].add_adjacent_node(self.board[j2][i2])
                    
    def find_all_words(self):
        words = set()
        for j,i in np.ndindex(self.shape):
            node = self.board[j][i]
            words |= self._find_words_from_node_(node, dict_trie[node.char], (node,));
        return words
        
    def _find_words_from_node_(self, node, trie_node, prev_nodes):
        words = set()
        if term_char in trie_node:
            words.add(''.join([node.char for node in prev_nodes]))
        for next_node in node.adjacent_nodes:
            next_char = next_node.char
            if next_char in trie_node:
                words |= self._find_words_from_node_(next_node, trie_node[next_char], prev_nodes + (next_node,))
        return words
    
    def __repr__(self):
        return '\n'.join([' '.join([self.board[j][i].char for i in range(self.shape[1])]) for j in range(self.shape[0])])

In [250]:
b = BoggleBoard(shape=(4,4))
b,b.find_all_words()

(p p d r
 i n g e
 c w z w
 d m e l,
 {'de',
  'der',
  'dpi',
  'drew',
  'ed',
  'edge',
  'el',
  'em',
  'er',
  'gdp',
  'greg',
  'grew',
  'i',
  'in',
  'inc',
  'ind',
  'ing',
  'me',
  'mel',
  'mem',
  'pgp',
  'pi',
  'pic',
  'picnic',
  'pin',
  'ping',
  're',
  'red',
  'reg',
  'we',
  'wed',
  'were',
  'win',
  'wind',
  'wing'})

In [90]:
iter = np.ndindex(np.shape([[0],[0]]))
next(iter)

(0, 0)

In [106]:
np.zeros((3,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [125]:
list(np.ndindex((3,3))).remove((1,1))

In [133]:
test = [(1,1)]
test.remove((1,1)); test

[]

In [143]:
1 < 4 < 5

True

In [231]:
''.join(['a', 'b'])

'ab'