In [None]:
import numpy as np

import hammlet
import hmm
import sonnet_helper as sh
import tokenizer

In [None]:
## tokenizer.py
## (default sonnet text data: 'data/shakespeare.txt' and 'data/spenser.txt)
## ========================================================================

data = tokenizer.load_data()
#print(data)

lines = tokenizer.sequence_each_line(tokenizer.tokenize_lpunc, data)
print(lines)

quatrains, couplets = tokenizer.sequence_quatrains_couplets(tokenizer.tokenize_lpunc, data)
#print(quatrains)
#print(couplets)

quatrain_rhymes, couplet_rhymes = tokenizer.process_rhymes(data)
#print(quatrain_rhymes)
#print(couplet_rhymes)

word_count = tokenizer.process_word_frequency(data)
#print(word_count)

In [None]:
## tokenizer.py: rhymes processing
## ===============================

def generate_token_dict(training_sequences):
    obs_id = 0
    token_dict = {}

    for sequence in training_sequences:
        for token in sequence:
            if token not in token_dict:
                token_dict[token] = obs_id
                obs_id += 1

    return token_dict

data = tokenizer.load_data()
training_sequences = tokenizer.sequence_each_line(tokenizer.tokenize_lpunc, data)
token_dict = generate_token_dict(training_sequences)

q_rhymes, c_rhymes = tokenizer.process_rhymes(data)

for r in q_rhymes:
    try:
        index0 = token_dict[r[0]]
        index1 = token_dict[r[1]]
    except KeyError:
        print("Mising Key: {}".format(r))
        
for c in c_rhymes:
    try:
        index0 = token_dict[c[0]]
        index1 = token_dict[c[1]]
    except KeyError:
        print("Missing Key: {}".format(c))

In [None]:
## hmm.py: train/save/load model
## =============================

# Specify number of states
hmm_model = hmm.HiddenMarkovModel(10)
# Specify stopping conditions
A, B, PI, token_dict = hmm_model.train(lines, epsilon=0.1, max_iter=20)

hmm.save_hmm_model("HMM-10S-01E-20X", A, B, PI, token_dict)
A2, B2, PI2, token_dict2 = hmm.load_hmm_model("HMM-10S-01E-20X")

assert(A.all() == A2.all()), "ERROR: `A` does not match."
assert(B.all() == B2.all()), "ERROR: `B` does not match."
assert(PI.all() == PI2.all()), "ERROR: `PI` does not match."
assert(token_dict == token_dict2), "ERROR: `token_dict` does not match."

In [None]:
## hammlet.py: generate sonnets
## ============================

#sonnet = hammlet.generate_sonnet(model_type="HMM", model_name="HMM-10S-01E-20X", rhyme=True)
sonnet = hammlet.generate_hmm_sonnet(model="HMM-10S-01E-20X", rhyme=True)
print(sonnet)