In [None]:
import numpy as np
import tokenizer
from hmm import HiddenMarkovModel
import hmmhelper as hh
import poemgen

In [None]:
## TEST tokenizer.py
## (default sonnet text data: 'shakespeare.txt')
## =============================================

quatrains, couplets = tokenizer.sequence_quatrains_couplets(tokenizer.tokenize_lpunc)
lines = tokenizer.sequence_each_line(tokenizer.tokenize_lpunc)

assert((151 * 14) == len(lines)), "ERROR: Incorrect sonnet lines count."

quatrain_rhymes, couplet_rhymes = tokenizer.process_rhymes()
word_count = tokenizer.process_word_frequency()

In [None]:
## TEST hmm.py: train model
## =========================

# Specify number of states.
test_hmm = HiddenMarkovModel(10)
# Specify stopping conditions.
A, B, PI, token_dict = test_hmm.train(couplets, epsilon=0.1, max_iter=20)

In [None]:
## TEST hmmhelper.py: save/load models
## ===================================

hh.save_hmm("test-10-states", A, B, PI, token_dict)
A2, B2, PI2, token_dict2 = hh.load_hmm("test-10-states")

assert(A.all() == A2.all()), "ERROR: `A` does not match."
assert(B.all() == B2.all()), "ERROR: `B` does not match."
assert(PI.all() == PI2.all()), "ERROR: `PI` does not match."
assert(token_dict == token_dict2), "ERROR: `token_dict` does not match."

In [None]:
## TEST tokenizer.py: rhymes processing
## ====================================

def generate_token_dict(data):
    obs_id = 0
    token_dict = {}

    for sequence in data:
        for token in sequence:
            if token not in token_dict:
                token_dict[token] = obs_id
                obs_id += 1

    return token_dict

data = tokenizer.sequence_each_line(tokenizer.tokenize_lpunc)
token_dict = generate_token_dict(data)

q_rhymes, c_rhymes = tokenizer.process_rhymes()

for r in q_rhymes:
    try:
        index0 = token_dict[r[0]]
        index1 = token_dict[r[1]]
    except KeyError:
        print("Mising Key: {}".format(r))
        
for c in c_rhymes:
    try:
        index0 = token_dict[c[0]]
        index1 = token_dict[c[1]]
    except KeyError:
        print("Missing Key: {}".format(c))

In [None]:
## TEST poemgen.py: generate poems
## ===============================

# Generate non-rhyming sonnet (with iambic pentameter)
poem = poemgen.generate_poem("test-10-states")
print("{:=^25}".format("TEST SONNET"))
print(poem)

# Generate rhyming sonnet (without meter)
#poem_rhyme = poemgen.generate_poem_rhyme("test-10-states-rhyme")
#print("{:=^25}".format("TEST SONNET"))
#print(poem_rhyme)