The task is to build a poem generator using Markov models.

In [1]:
import numpy as np
import string

In [2]:
# download the dataset: poems by Robert Frost
!wget -nc https://raw.githubusercontent.com/lazyprogrammer/machine_learning_examples/master/hmm_class/robert_frost.txt

File ‘robert_frost.txt’ already there; not retrieving.



In [3]:
# make dictionaries for phrases starts, second words and further transitions
initial = {}
first_order = {}
second_order = {}

In [4]:
def add2dict(dic, key, value):
    if key not in dic:
        dic[key] = []
    dic[key].append(value)

In [5]:
# process the texts to fill up the dictionaries 
for line in open('robert_frost.txt'):
    tokens = line.rstrip().lower().translate(str.maketrans('', '', string.punctuation)).split()
    T = len(tokens)
    for i in range(T):
        t = tokens[i]
        if i == 0:
            initial[t] = initial.get(t, 0.) + 1
        else:
            t_1 = tokens[i-1]
            if i == T - 1:
                add2dict(second_order, (t_1, t), 'END')
            if i == 1:
                add2dict(first_order, t_1, t)
            else:
                t_2 = tokens[i-2]
                add2dict(second_order, (t_2, t_1), t)

In [6]:
# normalize the distributions
initial_total = sum(initial.values())
for t, c in initial.items():
    initial[t] = c / initial_total

In [7]:
# convert each list into a dictionary of probabilities
def list2pdict(tokens):
    d = {}
    n = len(tokens)
    for t in tokens:
        d[t] = d.get(t, 0.) + 1
    for t, c in d.items():
        d[t] = c / n
    return d

In [8]:
for t_1, ts in first_order.items():
    first_order[t_1] = list2pdict(ts)

In [9]:
for k, ts in second_order.items():
    second_order[k] = list2pdict(ts)

In [10]:
# create a function to get a word
def sample_word(d):
    p0 = np.random.random()
    cumulative_p = 0
    for t, p in d.items():
        cumulative_p += p
        if p0 < cumulative_p:
            return t
    assert(False)

In [11]:
# create a function to generate a poem of 4 lines
def generate_poem():
    for i in range(4):
        sentence = []
        # get initial word
        w0 = sample_word(initial)
        sentence.append(w0)
        # get the second word
        w1 = sample_word(first_order[w0])
        sentence.append(w1)
        # get further transitions until 'END' value
        while True:
            w2 = sample_word(second_order[w0, w1])
            if w2 == 'END':
                break
            sentence.append(w2)
            w0 = w1
            w1 = w2
            
        print(" ".join(sentence))

In [12]:
generate_poem()

its as you seize catscradle strings
right past both father and mother and when ive done it what was either cloud or smoke
in wentworth from now on and when i go up through the door shut
he gained no foothold but pursued


In [13]:
generate_poem()

i shall suspect
great monolithic knees the former town
you look so sitting out here if you ask me that flower
in an alder swamp


In [14]:
generate_poem()

and nothing came of all the day when it was all there was water in the cellar bones out of snow
back in rain
and politician at odd seasons
sometimes i wander out of such a wild descent
