In [1]:
%load_ext autoreload
%autoreload 2

## Step 1: verify correctness, or at least that the two generators yield identical results

In [2]:
from collections import namedtuple

from pagan_army.anagram_handler import AnagramHandler
from pagan_army.hash_anagram_generator import HashAnagramGenerator
from pagan_army.unsorted_trie_anagram_generator import UnsortedTrieAnagramGenerator


TestDeets = namedtuple('TestDeets', ['word_length', 'anagramee', 'expected_count'])

tests = [ 
    TestDeets(word_length=2, anagramee='my game', expected_count=7),
    TestDeets(word_length=2, anagramee='is like the', expected_count=81),
    TestDeets(word_length=6, anagramee='pythagorean theorem', expected_count=54),
    TestDeets(word_length=5, anagramee='there is no answer', expected_count=1177)
]


for test_deets in tests:
    hash_gen = HashAnagramGenerator(minimum_word_length=test_deets.word_length)
    hash_handler = AnagramHandler(test_deets.anagramee, anagram_generator=hash_gen)
    all_from_hash = list(hash_handler)
    
    trie_gen = UnsortedTrieAnagramGenerator(minimum_word_length=test_deets.word_length)
    trie_handler = AnagramHandler(test_deets.anagramee, anagram_generator=trie_gen)
    all_from_trie = list(trie_handler)

    assert len(all_from_hash) == len(all_from_trie) == test_deets.expected_count
    assert sorted(all_from_hash) == sorted(all_from_trie)

## Step 2: simple performance test in a realword-ish scenario to compare hash_anagram_generator and unsorted_trie_anagram_generator

In [3]:
import timeit

iterations = 5
executions = 25
anagramee = 'alex gessner'
word_length = 3
number_of_grams = 30


global_shared_setup = '''
import timeit
from pagan_army.anagram_handler import AnagramHandler
from pagan_army.hash_anagram_generator import HashAnagramGenerator
from pagan_army.unsorted_trie_anagram_generator import UnsortedTrieAnagramGenerator
'''

easy_test_setup = '''
ANAGRAMEE = '{anagramee}'
WORD_LENGTH = {word_length}
'''.format(anagramee=anagramee, word_length=word_length)

hashed_setup = '''
GENERATOR = HashAnagramGenerator()
'''
trie_setup = '''
GENERATOR = UnsortedTrieAnagramGenerator()
'''
run_it = '''
handler = AnagramHandler(ANAGRAMEE, anagram_generator=GENERATOR, minimum_word_length=WORD_LENGTH)
handler.get_n({})
'''.format(number_of_grams)

print('TEST RESULTS\n------------')
print("anagramming '{anagramee}' {count} times...".format(anagramee=anagramee, count=number_of_grams))
print("for {} iterations of {} executions".format(iterations, executions))
hashed_times = timeit.Timer(hashed_setup + run_it, setup=global_shared_setup + easy_test_setup).repeat(iterations, executions)
hashed_times = [round(time, 3) for time in hashed_times]
print("the hash anagram generator times:", hashed_times)
trie_times = timeit.Timer(trie_setup + run_it, setup=global_shared_setup + easy_test_setup).repeat(iterations, executions)
trie_times = [round(time, 3) for time in trie_times]
print("the trie anagram generator times:", trie_times)


TEST RESULTS
------------
anagramming 'alex gessner' 30 times...
for 5 iterations of 25 executions
the hash anagram generator times: [3.206, 3.182, 3.154, 3.182, 3.224]
the trie anagram generator times: [37.018, 37.001, 36.801, 37.343, 36.843]


# Thoughts
## tl;dr the hash_anagram_generator is an order of magnitude faster than an unsorted trie.
### wowza. so i'm not so surprised that the dictionary was faster than the unsorted trie. there's so much i could do to optimize the trie. the biggest one i'm thinking right now is sorting the letters by order of frequency in the overall list of words and then having the leaves be a list of all words with those letters. maybe i'll do that and revisit. 