In [46]:

'''Example script to generate shakespeare sonnets.
At least 20 epochs are required before the generated text
starts sounding coherent.
'''


from __future__ import print_function
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
#from keras.datasets.data_utils import get_file
import numpy as np
import random
import sys
import json
import pickle
import nltk as nltk

def readSonnet(fileName):
    """
    This function reads the sonnets, lines, words in the given file.
    Input:
        Each sonnet has 17 lines: number line, 14 lines, 2 empty lines
    Output:
        sonnet_lst: the list of sonnet
        line_lst: the list of lines in all sonnets
        word_dict: the dictionary of words appears in the sonnets
    """
    with open(fileName, 'r') as f:
        lines = f.read().splitlines()
    sonnet_lst = []
    line_lst = []
    word_dict = dict()
    word_set = set()
    for i, line in enumerate(lines):
        j = i % 17
        if j == 0:
            sonnet = []
        elif (j >= 1) and (j <= 14):
            for punc in ",.?:;!()":
                line = line.replace(punc, ' ').lower()
            sonnet.append(line)
            line_lst.append(line)
            words = nltk.tokenize.word_tokenize(line)
            for word in words:
                word = word.lower()
                try:
                    word_dict[word] += 1
                    word_set.add(word)
                except KeyError:
                    word_dict[word] = 1
                    word_set.add(word)

        elif j == 15:
            sonnet_lst.append(sonnet)
    return sonnet_lst, line_lst, word_dict, word_set










In [47]:
sonnet_lst, line_lst, word_dict, word_set = readSonnet('shakespeare.txt')
print('The number of sonnets is ', len(sonnet_lst))
print('The number of lines is ', len(line_lst))
print('The number of words is ', len(word_dict))

#text = open('skp_clean.txt').read().lower()
#print ('corpus length:', len(text))
#print (text)

word = list(word_set)
print('total chars:', len(word))
#print (word)

word_indices = dict((c, i) for i, c in enumerate(word))#construct a dictionary to make it easy to vectorize the input text...
indices_word = dict((i, c) for i, c in enumerate(word))

#pickle.dump(char_indices, open("char_indic.json", "wb"))#file format control
#pickle.dump(indices_char, open("indic_char.json", "wb"))


# cut the text in semi-redundant sequences of maxlen characters

maxlen = 0
step = 1
sentences = []
next_sentence = []
for i in range(0, len(line_lst)-1, step):
    sentence_words =nltk.tokenize.word_tokenize(line_lst[i])
    sentences.append(sentence_words)
    if len(sentence_words) >maxlen:
        maxlen = len(sentence_words)
    sentence_words =nltk.tokenize.word_tokenize(line_lst[i+1])
    next_sentence.append(sentence_words)
print('nb sequences:', len(sentences))


The number of sonnets is  152
The number of lines is  2128
The number of words is  3122
total chars: 3122
nb sequences: 2127


In [49]:
#print (sentences[0:10])
#print (next_sentence[0:10])
import time
start = time.time()
print('Vectorization...')
X = np.zeros((len(sentences), maxlen, len(word)), dtype=np.bool)
y = np.zeros((len(sentences), maxlen, len(word)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    if i<len(sentences) - 1:
        sentence_next = sentences[i+1]
        for t, word_t in enumerate(sentence):
            X[i, t, word_indices[word_t]] = 1
        for t, word_t in enumerate(sentence_next):
            y[i, t, word_indices[word_t]] = 1
print (len(sentences))
print (maxlen)


# build the model: 2 stacked LSTM
print('Build model...')
print (maxlen, len(word))
model = Sequential()
#model.add(LSTM(512, return_sequences = True, input_dim=(maxlen,len(word)), input_length=len(sentences)))
model.add(LSTM(512, return_sequences=True, input_shape=(maxlen, len(word))))
model.add(Dropout(0.2))
model.add(LSTM(512, return_sequences=True))
model.add(Dropout(0.2))
model.add(Dense(len(word)))
model.add(Activation('sigmoid'))

model.compile(loss='categorical_crossentropy', optimizer='rmsprop')


def sample(a, temperature):
    a = np.exp(a)
    if np.sum(a)==0:
        return (random.randint(0, len(word)-2) )
    else:
        a /= np.sum(a)
    return np.argmax(np.random.multinomial(1, a, 1))


#def sample(a, temperature):
    # helper function to sample an index from a probability array
    #a = np.log(a) / temperature
    #a = np.exp(a) / np.sum(np.exp(a))
    
#    tem = np.sum(a[0:len(a)-1]);
#    b = a[0:len(a)-1]
#    b = np.hstack((b, tem))
    #print(a)
    #b = a
#    return np.argmax(np.random.multinomial(1, b, 1))



Vectorization...
2127
12
Build model...
12 3122


In [50]:
# train the model, output generated text after each iteration
#for iteration in range(1, 60):
for iteration in range(1, 2):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.load_weights("weights.h5")
    model.fit(X, y, batch_size=128, nb_epoch=1)

    # dump the model to disk
    json_string = model.to_json()
    open("arch.json", "w").write(json_string)
    model.save_weights("weights.h5", overwrite=True)

start_index = random.randint(0, len(line_lst) - maxlen - 1)

for diversity in [0.2,0.5]:
    print()
    print('----- diversity:', diversity)

    generated = []
    sentence = sentences[start_index]
    generated.append (sentence)
    print('----- Generating with seed: "', sentence, '"')
    #sys.stdout.write(generated)
    sonnet_len = 14;#length of the sonnet

    for i in range(sonnet_len):
        x = np.zeros((1, maxlen, len(word)))
        for t, word_t in enumerate(sentence):
            x[0, t, word_indices[word_t]] = 1.

        preds = model.predict(x, verbose=0)[0]
        #print (preds)
        #print (preds)
        sentence_now = []
        line_now =''
        #for ip in range (maxlen):
        for ip in range (8):
            prediction = preds[ip]
            #print(prediction)
            #print(len(prediction))
            #print(len(prediction[1]))
            next_index = sample(prediction, diversity)#sample function need to be redefined
            next_word = indices_word[next_index]
            sentence_now.append(next_word);
            generated.append(next_word)
#             sys.stdout.write(next_word)
            line_now = line_now + ' ' + next_word
#               print (next_word)
        sentence = sentence_now
        print (line_now,'\n')
#           sys.stdout.flush()
    print()
end = time.time()
print ('the total running time is', end - start)


--------------------------------------------------
Iteration 1
Epoch 1/1

----- diversity: 0.2
----- Generating with seed: " ['o', 'but', 'with', 'mine', 'compare', 'thou', 'thine', 'own', 'state'] "
 pity thrall else special rest fixed twenty unfair 

 deceivest building whole spent princes subsist self-love fearing 

 picture hand reeks loan pry unbred time-bettering mars 

 steep-up limits fears seem o'ertake thrust ruinate sober 

 trial gilded forbidden grecian wilfully become then intelligence 

 exchequer mourning widow towers beweep needs decree misuse 

 scandal fester contains succeeding cover given stained blind 

 warmed flattery full ill-used paying misuse enemies pardon 

 sober league pitch seen sees maiden air locked 

 dreading increase tempting vial paying shaken grace strongly 

 blood filed looks doth precious painful unbless striving 

 separable enlarged children hated be addition gain last 

 couldst so besides pitying made weakens pity lascivious 

 given age p

array([ 0.        ,  0.69314718,  1.09861229,  1.38629436])

-0.0

----- diversity: 0.2
----- Generating with seed: " ['that', 'tongue', 'that', 'tells', 'the', 'story', 'of', 'thy', 'days'] "

 that can live yet runs this unperfect things 


 like stones see delivers vice disdaineth true did 


 but life progress day hath masonry restore if 


 i mine for now subject ear give afford 


 though end seasons that prescriptions worthy life ' 


 which proudest that who confound wrackful see lambs 

 can see small more blood not writ disposed 


 is o'ercharged argument him frown'st theirs virgin blesses 


 for defaced virtuous dignified 's will fire under 

 and want and needs have be of pleasing 


 to present-absent balmy stick'st come hidden verse defendant 

 vulgar love unhappily well with time slave death 


 my harmful ashes and two brain dark ' 


 to heaven self thee me me the society 