# Week 1 - Lesson 1

In [0]:
from tensorflow.keras.preprocessing.text import Tokenizer

sentences = [
    'i love my dog',
    'I, love my cat',
    'You love my dog!'
]

tokenizer = Tokenizer(num_words = 100)
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
print(word_index)

# Week 1 - Lesson 2

In [0]:
import tensorflow as tf
from tensorflow import keras


from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

sentences = [
    'I love my dog',
    'I love my cat',
    'You love my dog!',
    'Do you think my dog is amazing?'
]

tokenizer = Tokenizer(num_words = 100, oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index

sequences = tokenizer.texts_to_sequences(sentences)

padded = pad_sequences(sequences, maxlen=5)
print("\nWord Index = " , word_index)
print("\nSequences = " , sequences)
print("\nPadded Sequences:")
print(padded)


# Try with words that the tokenizer wasn't fit to
test_data = [
    'i really love my dog',
    'my dog loves my manatee'
]

test_seq = tokenizer.texts_to_sequences(test_data)
print("\nTest Sequence = ", test_seq)

padded = pad_sequences(test_seq, maxlen=10)
print("\nPadded Test Sequence: ")
print(padded)

# Week 1 - Lesson 3

In [0]:
!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
    -O /tmp/sarcasm.json
  
import json

with open("/tmp/sarcasm.json", 'r') as f:
    datastore = json.load(f)


sentences = [] 
labels = []
urls = []
for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])
    urls.append(item['article_link'])



from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(sentences)

word_index = tokenizer.word_index
print(len(word_index))
print(word_index)
sequences = tokenizer.texts_to_sequences(sentences)
padded = pad_sequences(sequences, padding='post')
print(padded[0])
print(padded.shape)

# Week 2 Lesson 1

In [0]:



# NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT

import tensorflow as tf
print(tf.__version__)

# This is needed for the iterator over the data
# But not necessary if you have TF 2.0 installed
#!pip install tensorflow==2.0.0-beta0


tf.enable_eager_execution()

# !pip install -q tensorflow-datasets



import tensorflow_datasets as tfds
imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)



import numpy as np

train_data, test_data = imdb['train'], imdb['test']

training_sentences = []
training_labels = []

testing_sentences = []
testing_labels = []

# str(s.tonumpy()) is needed in Python3 instead of just s.numpy()
for s,l in train_data:
  training_sentences.append(str(s.numpy()))
  training_labels.append(l.numpy())
  
for s,l in test_data:
  testing_sentences.append(str(s.numpy()))
  testing_labels.append(l.numpy())
  
training_labels_final = np.array(training_labels)
testing_labels_final = np.array(testing_labels)



vocab_size = 10000
embedding_dim = 16
max_length = 120
trunc_type='post'
oov_tok = "<OOV>"


from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences,maxlen=max_length)



reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

print(decode_review(padded[1]))
print(training_sentences[1])



model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()



num_epochs = 10
model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))



e = model.layers[0]
weights = e.get_weights()[0]
print(weights.shape) # shape: (vocab_size, embedding_dim)



import io

out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
out_m = io.open('meta.tsv', 'w', encoding='utf-8')
for word_num in range(1, vocab_size):
  word = reverse_word_index[word_num]
  embeddings = weights[word_num]
  out_m.write(word + "\n")
  out_v.write('\t'.join([str(x) for x in embeddings]) + "\n")
out_v.close()
out_m.close()



try:
  from google.colab import files
except ImportError:
  pass
else:
  files.download('vecs.tsv')
  files.download('meta.tsv')



sentence = "I really think this is amazing. honest."
sequence = tokenizer.texts_to_sequences(sentence)
print(sequence)



# Week 2 - Lesson 2

In [0]:



import json
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences



vocab_size = 10000
embedding_dim = 16
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000



!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
    -O /tmp/sarcasm.json



with open("/tmp/sarcasm.json", 'r') as f:
    datastore = json.load(f)

sentences = []
labels = []

for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])



training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]



tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)



model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])



model.summary()



num_epochs = 30
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()
  
plot_graphs(history, "accuracy")
plot_graphs(history, "loss")



reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_sentence(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

print(decode_sentence(training_padded[0]))
print(training_sentences[2])
print(labels[2])



e = model.layers[0]
weights = e.get_weights()[0]
print(weights.shape) # shape: (vocab_size, embedding_dim)



import io

out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
out_m = io.open('meta.tsv', 'w', encoding='utf-8')
for word_num in range(1, vocab_size):
  word = reverse_word_index[word_num]
  embeddings = weights[word_num]
  out_m.write(word + "\n")
  out_v.write('\t'.join([str(x) for x in embeddings]) + "\n")
out_v.close()
out_m.close()



try:
  from google.colab import files
except ImportError:
  pass
else:
  files.download('vecs.tsv')
  files.download('meta.tsv')



sentence = ["granny starting to fear spiders in the garden might be real", "game of thrones season finale showing this sunday night"]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print(model.predict(padded))



# Week 2 Lesson 3

In [0]:



# NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT

import tensorflow as tf
print(tf.__version__)



# Uncomment and run this if you don't have TensorFlow 2.0x [Check for latest 2.0 instructions at https://www.tensorflow.org/versions/r2.0/api_docs/python/tf]
#!pip install tensorflow==2.0.0-beta0



# Double check TF 2.0x is installed. If you ran the above block, there was a 
# 'reset all runtimes' button at the bottom that you needed to press
import tensorflow as tf
print(tf.__version__)



# If the import fails, run this
# !pip install -q tensorflow-datasets

import tensorflow_datasets as tfds
imdb, info = tfds.load("imdb_reviews/subwords8k", with_info=True, as_supervised=True)



train_data, test_data = imdb['train'], imdb['test']



tokenizer = info.features['text'].encoder



print(tokenizer.subwords)



sample_string = 'TensorFlow, from basics to mastery'

tokenized_string = tokenizer.encode(sample_string)
print ('Tokenized string is {}'.format(tokenized_string))

original_string = tokenizer.decode(tokenized_string)
print ('The original string: {}'.format(original_string))



for ts in tokenized_string:
  print ('{} ----> {}'.format(ts, tokenizer.decode([ts])))



embedding_dim = 64
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, embedding_dim),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.summary()



num_epochs = 10

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

history = model.fit(train_data, epochs=num_epochs, validation_data=test_data)



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()
  
plot_graphs(history, "accuracy")
plot_graphs(history, "loss")



e = model.layers[0]
weights = e.get_weights()[0]
print(weights.shape) # shape: (vocab_size, embedding_dim)

import io

out_v = io.open('vecs.tsv', 'w', encoding='utf-8')
out_m = io.open('meta.tsv', 'w', encoding='utf-8')
for word_num in range(1, tokenizer.vocab_size):
  word = tokenizer.decode([word_num])
  embeddings = weights[word_num]
  out_m.write(word + "\n")
  out_v.write('\t'.join([str(x) for x in embeddings]) + "\n")
out_v.close()
out_m.close()


try:
  from google.colab import files
except ImportError:
  pass
else:
  files.download('vecs.tsv')
  files.download('meta.tsv')



# Single Layer LSTM: Week 3 lesson 1

In [0]:


from __future__ import absolute_import, division, print_function, unicode_literals


import tensorflow_datasets as tfds
import tensorflow as tf
print(tf.__version__)



# If the tf.__version__ is 1.x, please run this cell
#!pip install tensorflow==2.0.0-beta0



import tensorflow_datasets as tfds
import tensorflow as tf
print(tf.__version__)



# Get the data
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']



tokenizer = info.features['text'].encoder



BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)
test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)



model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])



model.summary()



model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



NUM_EPOCHS = 10
history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()



plot_graphs(history, 'accuracy')



plot_graphs(history, 'loss')



# Multiple Layer LSTM - Week 3 Lesson 2

In [0]:



from __future__ import absolute_import, division, print_function, unicode_literals


import tensorflow_datasets as tfds
import tensorflow as tf
print(tf.__version__)



# If the tf.__version__ is 1.x, please run this cell
!pip install tensorflow==2.0.0-beta0



import tensorflow_datasets as tfds
import tensorflow as tf
print(tf.__version__)



# Get the data
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']



tokenizer = info.features['text'].encoder



BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)
test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)



model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])



model.summary()



model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



NUM_EPOCHS = 10
history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()



plot_graphs(history, 'accuracy')



plot_graphs(history, 'loss')



# Multiple Layer GRU: Week 3 lesson 3

In [0]:



from __future__ import absolute_import, division, print_function, unicode_literals


import tensorflow_datasets as tfds
import tensorflow as tf
print(tf.__version__)



# If the tf.__version__ is 1.x, please run this cell
!pip install tensorflow==2.0.0-beta0



import tensorflow_datasets as tfds
import tensorflow as tf
print(tf.__version__)






# Get the data
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']



tokenizer = info.features['text'].encoder



BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes)
test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)



model = tf.keras.Sequential([
    tf.keras.layers.Embedding(tokenizer.vocab_size, 64),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])



model.summary()



model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



NUM_EPOCHS = 10
history = model.fit(train_dataset, epochs=NUM_EPOCHS, validation_data=test_dataset)



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()



plot_graphs(history, 'accuracy')



plot_graphs(history, 'loss')



# Week 3 Lesson 4

In [0]:



import json
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
    -O /tmp/sarcasm.json

vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000


with open("/tmp/sarcasm.json", 'r') as f:
    datastore = json.load(f)


sentences = []
labels = []
urls = []
for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])

training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

num_epochs = 50
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()

plot_graphs(history, 'acc')
plot_graphs(history, 'loss')



model.save("test.h5")



# Week 3 lesson 5

In [0]:



import json
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
    -O /tmp/sarcasm.json

vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = 20000


with open("/tmp/sarcasm.json", 'r') as f:
    datastore = json.load(f)


sentences = []
labels = []
urls = []
for item in datastore:
    sentences.append(item['headline'])
    labels.append(item['is_sarcastic'])

training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

num_epochs = 50
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()

plot_graphs(history, 'acc')
plot_graphs(history, 'loss')



model.save("test.h5")



# Week 3 lesson 6

In [0]:



# NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT

import tensorflow as tf
print(tf.__version__)

# This is needed for the iterator over the data
# But not necessary if you have TF 2.0 installed
#!pip install tensorflow==2.0.0-beta0


tf.enable_eager_execution()

# !pip install -q tensorflow-datasets



import tensorflow_datasets as tfds
imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)



import numpy as np

train_data, test_data = imdb['train'], imdb['test']

training_sentences = []
training_labels = []

testing_sentences = []
testing_labels = []

# str(s.tonumpy()) is needed in Python3 instead of just s.numpy()
for s,l in train_data:
  training_sentences.append(str(s.numpy()))
  training_labels.append(l.numpy())
  
for s,l in test_data:
  testing_sentences.append(str(s.numpy()))
  testing_labels.append(l.numpy())
  
training_labels_final = np.array(training_labels)
testing_labels_final = np.array(testing_labels)



vocab_size = 10000
embedding_dim = 16
max_length = 120
trunc_type='post'
oov_tok = "<OOV>"


from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences,maxlen=max_length)



reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

print(decode_review(padded[1]))
print(training_sentences[1])



model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()



num_epochs = 50
history = model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.plot(history.history['val_'+string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.legend([string, 'val_'+string])
  plt.show()

plot_graphs(history, 'accuracy')
plot_graphs(history, 'loss')



# Model Definition with LSTM
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()



# Model Definition with Conv1D
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(6, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()



# Week 4 lesson 1

In [0]:



import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np



tokenizer = Tokenizer()

data="In the town of Athy one Jeremy Lanigan \n Battered away til he hadnt a pound. \nHis father died and made him a man again \n Left him a farm and ten acres of ground. \nHe gave a grand party for friends and relations \nWho didnt forget him when come to the wall, \nAnd if youll but listen Ill make your eyes glisten \nOf the rows and the ructions of Lanigans Ball. \nMyself to be sure got free invitation, \nFor all the nice girls and boys I might ask, \nAnd just in a minute both friends and relations \nWere dancing round merry as bees round a cask. \nJudy ODaly, that nice little milliner, \nShe tipped me a wink for to give her a call, \nAnd I soon arrived with Peggy McGilligan \nJust in time for Lanigans Ball. \nThere were lashings of punch and wine for the ladies, \nPotatoes and cakes; there was bacon and tea, \nThere were the Nolans, Dolans, OGradys \nCourting the girls and dancing away. \nSongs they went round as plenty as water, \nThe harp that once sounded in Taras old hall,\nSweet Nelly Gray and The Rat Catchers Daughter,\nAll singing together at Lanigans Ball. \nThey were doing all kinds of nonsensical polkas \nAll round the room in a whirligig. \nJulia and I, we banished their nonsense \nAnd tipped them the twist of a reel and a jig. \nAch mavrone, how the girls got all mad at me \nDanced til youd think the ceiling would fall. \nFor I spent three weeks at Brooks Academy \nLearning new steps for Lanigans Ball. \nThree long weeks I spent up in Dublin, \nThree long weeks to learn nothing at all,\n Three long weeks I spent up in Dublin, \nLearning new steps for Lanigans Ball. \nShe stepped out and I stepped in again, \nI stepped out and she stepped in again, \nShe stepped out and I stepped in again, \nLearning new steps for Lanigans Ball. \nBoys were all merry and the girls they were hearty \nAnd danced all around in couples and groups, \nTil an accident happened, young Terrance McCarthy \nPut his right leg through miss Finnertys hoops. \nPoor creature fainted and cried Meelia murther, \nCalled for her brothers and gathered them all. \nCarmody swore that hed go no further \nTil he had satisfaction at Lanigans Ball. \nIn the midst of the row miss Kerrigan fainted, \nHer cheeks at the same time as red as a rose. \nSome of the lads declared she was painted, \nShe took a small drop too much, I suppose. \nHer sweetheart, Ned Morgan, so powerful and able, \nWhen he saw his fair colleen stretched out by the wall, \nTore the left leg from under the table \nAnd smashed all the Chaneys at Lanigans Ball. \nBoys, oh boys, twas then there were runctions. \nMyself got a lick from big Phelim McHugh. \nI soon replied to his introduction \nAnd kicked up a terrible hullabaloo. \nOld Casey, the piper, was near being strangled. \nThey squeezed up his pipes, bellows, chanters and all. \nThe girls, in their ribbons, they got all entangled \nAnd that put an end to Lanigans Ball."

corpus = data.lower().split("\n")

tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

print(tokenizer.word_index)
print(total_words)



input_sequences = []
for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences.append(n_gram_sequence)

# pad sequences 
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
xs, labels = input_sequences[:,:-1],input_sequences[:,-1]

ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)



print(tokenizer.word_index['in'])
print(tokenizer.word_index['the'])
print(tokenizer.word_index['town'])
print(tokenizer.word_index['of'])
print(tokenizer.word_index['athy'])
print(tokenizer.word_index['one'])
print(tokenizer.word_index['jeremy'])
print(tokenizer.word_index['lanigan'])



print(xs[6])



print(ys[6])



print(xs[5])
print(ys[5])



print(tokenizer.word_index)



model = Sequential()
  model.add(Embedding(total_words, 64, input_length=max_sequence_len-1))
  model.add(Bidirectional(LSTM(20)))
  model.add(Dense(total_words, activation='softmax'))
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  history = model.fit(xs, ys, epochs=500, verbose=1)



import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.show()



plot_graphs(history, 'acc')



seed_text = "Laurence went to dublin"
next_words = 100
  
for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = model.predict_classes(token_list, verbose=0)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)



# Week 4 lesson 2

In [0]:



import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np

In [2]:

!wget --no-check-certificate \
    https://storage.googleapis.com/laurencemoroney-blog.appspot.com/irish-lyrics-eof.txt \
    -O /tmp/irish-lyrics-eof.txt

--2019-05-26 17:39:23--  https://storage.googleapis.com/laurencemoroney-blog.appspot.com/irish-lyrics-eof.txt
Resolving storage.googleapis.com... 2a00:1450:4010:c0b::80, 173.194.222.128
Connecting to storage.googleapis.com|2a00:1450:4010:c0b::80|:443... connected.
WARNING: cannot verify storage.googleapis.com's certificate, issued by 'CN=Google Internet Authority G3,O=Google Trust Services,C=US':
  Unable to locally verify the issuer's authority.
HTTP request sent, awaiting response... 200 OK
Length: 68970 (67K) [text/plain]
Saving to: '/tmp/irish-lyrics-eof.txt'

/tmp/irish-lyrics-e 100%[=====================>]  67.35K  --.-KB/s   in 0.001s 

2019-05-26 17:39:23 (94.9 MB/s) - '/tmp/irish-lyrics-eof.txt' saved [68970/68970]

In [3]:

tokenizer = Tokenizer()

data = open('/tmp/irish-lyrics-eof.txt').read()

corpus = data.lower().split("\n")

tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index) + 1

print(tokenizer.word_index)
print(total_words)

{'harp': 366, 'hark': 1234, 'harm': 1907, 'defend': 1574, 'better': 704, 'hare': 1994, 'rats': 2391, 'sang': 599, 'sailed': 849, 'kilrain': 670, 'sent': 1004, 'drumslieve': 1376, 'joking': 2223, 'factory': 1528, 'send': 808, 'hired': 2010, 'til': 443, 'tin': 2327, 'lift': 2093, 'tie': 2406, 'hills': 331, 'speak': 1005, 'life': 191, 'desolation': 898, 'machine': 1586, 'tis': 162, 'pulled': 1711, 'divinity': 1630, 'green': 60, 'abusing': 2043, 'darlin': 741, 'dear': 78, 'deas': 557, 'black': 199, 'greet': 944, 'prouder': 2422, 'dead': 135, 'cats': 948, 'tabhair': 1671, 'cabin': 839, 'quitely': 2051, 'such': 349, 'lions': 1594, 'unprofaned': 1566, 'marks': 1299, 'london': 2479, 'trades': 2382, 'reap': 1984, 'lads': 496, 'mccorley': 358, 'sped': 2239, 'farrell': 578, 'pure': 365, 'each': 209, 'conversations': 2211, 'vigil': 1227, 'volunteers': 1576, 'moving': 1203, 'trust': 2349, 'fishin': 1766, 'garden': 579, 'though': 273, 'cheeks': 431, 'cheer': 2181, 'drown': 850, 'homes': 1404, 'cheek': 1698, 'hows': 2609, 'guinea': 2271, 'brand': 927, 'lark': 555, 'lips': 744, 'seasons': 1651, 'smother': 1983, 'rays': 709, 'clark': 1579, 'tenants': 2513, 'rows': 1800, 'mairis': 1885, 'grief': 539, 'california': 2228, 'one': 39, 'gad': 1641, 'mullingar': 1997, 'falls': 1917, 'seldom': 1392, 'farmstead': 2056, 'deprived': 2014, 'should': 224, 'bottles': 1107, 'although': 2457, 'sweetly': 745, 'auld': 574, 'och': 533, 'fly': 337, 'resolved': 1774, 'vogue': 2024, 'withdraw': 1952, 'made': 219, 'loo': 279, 'south': 977, 'valleys': 499, 'slanderin': 1436, 'haired': 2401, 'make': 196, 'cross': 594, 'rising': 780, 'low': 316, 'lot': 1918, 'passed': 197, 'hand': 129, 'loaded': 2591, 'foot': 669, 'sorry': 1915, 'drinking': 752, 'must': 146, 'jar': 1749, 'foots': 2545, 'aboard': 2028, 'singing': 306, 'gaily': 1884, 'gas': 1453, 'rakes': 788, 'likes': 1632, 'ceiling': 1839, 'most': 415, 'assisted': 1438, 'lines': 876, 'raked': 2417, 'softly': 1059, 'heaven': 2192, 'heavem': 2191, 'nay': 576, 'sons': 301, 'foam': 1395, 'tread': 1521, 'heaved': 2166, 'engaging': 2448, 'song': 212, 'creel': 1895, 'replied': 1873, 'treat': 2682, 'pike': 755, 'strangely': 1255, 'margin': 2277, 'glisten': 1799, 'cockles': 1407, 'surrendered': 2248, 'beauing': 2496, 'bride': 436, 'whack': 547, 'john': 1012, 'shc': 1288, 'she': 14, 'steer': 1968, 'terrible': 1876, 'vale': 2531, 'says': 303, 'world': 153, 'narrow': 660, 'quigley': 2215, 'meadow': 945, 'steel': 1541, 'shy': 2101, 'peeled': 2332, 'slaughter': 938, 'isle': 562, 'countrie': 2583, 'nice': 320, 'woman': 300, 'help': 2338, 'tear': 528, 'approaching': 902, 'sic': 1462, 'cried': 1852, 'three': 241, 'brogue': 2022, 'instead': 2035, 'khaki': 1600, 'sir': 561, 'sip': 2491, 'bush': 700, 'itself': 1564, 'sit': 325, 'loveliness': 1562, 'scratch': 2088, 'today': 180, 'six': 1756, 'clung': 2064, 'to': 4, 'sweetheart': 1002, 'shed': 442, 'across': 427, 'unpaid': 2160, 'being': 553, 'awaken': 1239, 'shes': 183, 'started': 1078, 'diamond': 1108, 'springtime': 1210, 'wished': 2034, 'same': 387, 'hole': 2086, 'prime': 1218, 'north': 995, 'albert': 1007, 'will': 26, 'gate': 2339, 'grim': 2065, 'thing': 456, 'thine': 962, 'landed': 641, 'think': 285, 'noise': 2555, 'colour': 1328, 'unto': 1057, 'shedding': 2536, 'comes': 515, 'fetters': 851, 'banbridge': 2230, 'changeless': 1550, 'tho': 677, 'sunshine': 523, 'dreamt': 2222, 'dreams': 475, 'sea': 97, 'laughter': 785, 'the': 1, 'weather': 1112, 'thc': 2131, 'tones': 600, 'war': 683, 'frequent': 2628, 'thy': 185, 'entwine': 687, 'carried': 1300, 'wear': 786, 'yell': 1738, 'wan': 2300, 'deliver': 1024, 'learning': 1842, 'faith': 1568, 'walkin': 1019, 'satisfied': 873, 'weak': 1680, 'flower': 774, 'them': 59, 'then': 52, 'thee': 142, 'heartfrom': 2001, 'they': 29, 'voice': 317, 'leggins': 2404, 'mothers': 375, 'score': 2377, 'constant': 1551, 'sheriff': 2164, 'catchers': 1826, 'truth': 527, 'zoo': 2379, 'aisy': 1294, 'gun': 863, 'fate': 1091, 'sack': 2411, 'hands': 632, 'custom': 2631, 'drawn': 722, 'wherever': 1371, 'porter': 1355, 'gardens': 679, 'red': 202, 'throughout': 2653, 'power': 1129, 'road': 160, 'kilrush': 1401, 'lambs': 1491, 'lord': 517, 'roam': 814, 'wife': 892, 'feet': 296, 'afraid': 2399, 'stranger': 1039, 'rolling': 1740, 'feel': 604, 'african': 1946, 'courting': 1047, 'beg': 719, 'bed': 613, 'bee': 1366, 'cursed': 856, 'sentry': 1764, 'crazy': 986, 'girls': 182, 'blades': 2619, 'bone': 1017, 'curses': 1306, 'gather': 1155, 'eer': 335, 'drinkin': 1158, 'slip': 2384, 'knew': 512, 'golden': 420, 'victory': 2193, 'guns': 530, 'ballygrant': 1377, 'meself': 753, 'belfast': 879, 'softest': 1360, 'lanigan': 1791, 'woods': 1073, 'sink': 2592, 'escape': 1250, 'swim': 906, 'enquiring': 2020, 'bounding': 1552, 'its': 75, 'reach': 2194, 'joyous': 2436, 'killarneys': 1975, 'foolish': 680, 'beside': 646, 'flow': 1191, 'wherein': 2151, 'dwell': 2152, 'brain': 1450, 'loses': 1964, 'rover': 407, 'morn': 414, 'proud': 346, 'railroad': 1769, 'more': 79, 'shoulders': 1781, 'together': 549, 'relations': 392, 'patrick': 1483, 'weary': 461, 'fervor': 1567, 'pray': 591, 'dearly': 2455, 'hummin': 1277, 'soil': 1509, 'tower': 1128, 'downfall': 2265, 'underfoot': 2617, 'customer': 1347, 'easily': 2572, 'siren': 1534, 'cozy': 1904, 'plying': 1188, 'church': 2343, 'milliner': 1808, 'join': 2526, 'nor': 484, 'first': 117, 'not': 54, 'now': 45, 'bower': 933, 'dawning': 385, 'moonshiny': 1447, 'above': 333, 'letters': 1003, 'bowed': 2444, 'goodbye': 2169, 'designed': 2487, 'boys': 228, 'sociable': 1787, 'twill': 868, 'which': 685, 'youve': 336, 'englands': 2611, 'chieftains': 937, 'gladness': 2418, 'weeks': 481, 'folk': 1249, 'larks': 2525, 'thinks': 2375, 'mchugh': 1872, 'spinning': 2538, 'charges': 2664, 'case': 1352, 'ld': 1272, 'springs': 1533, 'toil': 1428, 'freedom': 979, 'thinking': 356, 'li': 1273, 'courage': 825, 'hats': 2124, 'step': 554, 'until': 330, 'hopeless': 1367, 'battle': 2368, 'enchanting': 1494, 'laugh': 1180, 'gallon': 1106, 'finea': 1415, 'shocking': 2388, 'odds': 2074, 'autumn': 2554, 'stroll': 2015, 'ferry': 1380, 'best': 229, 'bough': 1920, 'laughd': 2317, 'beat': 1532, 'saluted': 1980, 'faces': 2055, 'strangers': 991, 'ivy': 2550, 'shirts': 2367, 'tomorrow': 621, 'ive': 87, 'neither': 626, 'beam': 2318, 'spied': 2656, 'pillage': 1665, 'control': 1644, 'stranded': 2094, 'pull': 2613, 'dirty': 947, 'easy': 416, 'stretched': 1866, 'lrelands': 2205, 'blame': 1143, 'somebody': 2552, 'deceive': 1027, 'agin': 980, 'pail': 2285, 'forgive': 2639, 'pain': 827, 'riding': 2440, 'tempered': 1542, 'paid': 932, 'napper': 2607, 'sounds': 1192, 'easter': 2118, 'strangled': 1879, 'edward': 2096, 'counting': 1713, 'shrill': 2226, 'pair': 751, 'strayed': 1074, 'host': 2053, 'draw': 2669, 'gloom': 2603, 'rope': 2063, 'sinners': 1637, 'spend': 652, 'open': 843, 'shining': 280, 'sovereigns': 2632, 'shamrock': 1208, 'blackbird': 1900, 'lass': 1072, 'gesture': 1678, 'last': 155, 'heard': 134, 'loss': 1978, 'lost': 860, 'sayin': 516, 'rate': 1962, 'solemn': 1235, 'under': 737, 'lose': 1363, 'docks': 1535, 'erin': 294, 'odonnell': 1477, 'seeking': 1267, 'avoidin': 1649, 'outspread': 2322, 'window': 705, 'apple': 1719, 'compare': 1396, 'smiles': 1701, 'blouse': 2397, 'woe': 955, 'violin': 1324, 'smiled': 513, 'ball': 232, 'wedding': 351, 'inside': 1113, 'beating': 781, 'before': 223, 'swinging': 1157, 'showrs': 2315, 'quay': 1085, 'train': 1536, 'bloom': 698, 'creature': 981, 'boot': 1433, 'forgets': 1569, 'lie': 465, 'holy': 1231, 'vermin': 1484, 'sail': 1502, 'sorrow': 854, 'oats': 1291, 'stuff': 1448, 'lassies': 2004, 'lip': 667, 'within': 1374, 'fishing': 1357, 'apprenticed': 1340, 'hold': 1573, 'neatest': 631, 'smoke': 1135, 'aching': 2306, 'beggarman': 1170, 'patricks': 2606, 'shinin': 1750, 'need': 2452, 'losing': 2042, 'greek': 1627, 'tenant': 2249, 'whisky': 2360, 'sands': 1372, 'picture': 1783, 'fulfill': 2209, 'midst': 1859, 'slumber': 582, 'lived': 2218, 'bunch': 803, 'eden': 1413, 'often': 2356, 'lives': 893, 'daughter': 277, 'ruler': 2197, 'credit': 2630, 'summers': 410, 'leaps': 2576, 'tide': 1499, 'moons': 2563, 'round': 150, 'shake': 2236, 'alone': 472, 'native': 910, 'along': 236, 'loves': 205, 'lover': 313, 'mild': 1727, 'heart': 43, 'belles': 925, 'hurled': 1307, 'whereon': 633, 'flag': 967, 'rally': 2180, 'loved': 237, 'rack': 2125, 'murther': 1854, 'pint': 1982, 'mounted': 2273, 'disquieted': 1248, 'swiftly': 1149, 'squall': 1426, 'divil': 607, 'fragrance': 1963, 'verdantly': 1565, 'bravely': 1089, 'fears': 2305, 'pheasants': 1403, 'townland': 1245, 'pigs': 2029, 'years': 243, 'roaming': 1385, 'praties': 2156, 'quality': 2016, 'yearn': 1705, 'counter': 1104, 'rowans': 1891, 'fishers': 2057, 'prince': 2095, 'thousand': 636, 'counted': 2658, 'lilting': 2645, 'gear': 2148, 'visit': 672, 'both': 433, 'shure': 790, 'landladys': 2633, 'wholl': 2601, 'glad': 2428, 'chamber': 2662, 'stars': 645, 'it': 25, 'is': 23, 'start': 1424, 'dublin': 151, 'huff': 1423, 'clumsy': 2111, 'if': 61, 'id': 168, 'laughing': 2005, 'winters': 872, 'im': 95, 'thems': 1668, 'fade': 682, 'fuel': 1940, 'sash': 1096, 'latinity': 1628, 'voices': 1152, 'veteran': 1575, 'charms': 958, 'jovial': 2210, 'delight': 154, 'theres': 80, 'store': 2626, 'platform': 2274, 'ship': 283, 'fathers': 315, 'distant': 1268, 'ruin': 1563, 'numbered': 1394, 'stands': 634, 'sell': 2449, 'summertime': 2687, 'evening': 393, 'melody': 2139, 'breast': 445, 'whats': 999, 'frisky': 2358, 'mornin': 1214, 'benburb': 1486, 'shades': 2532, 'promised': 1752, 'year': 2624, 'nlyme': 1219, 'bonny': 1053, 'top': 2480, 'airy': 1955, 'gathers': 1475, 'diamonds': 1336, 'flashed': 1097, 'rose': 102, 'doneen': 409, 'calls': 2381, 'quiet': 644, 'ringum': 1213, 'june': 1077, 'wink': 1809, 'spirit': 811, 'childhood': 1381, 'run': 421, 'rum': 1748, 'rub': 1165, 'mairi': 739, 'onward': 2238, 'unfurled': 1305, 'sheen': 2233, 'ten': 492, 'flew': 1760, 'treads': 2077, 'holly': 2557, 'slight': 2141, 'rock': 1278, 'support': 1390, 'done': 716, 'flute': 1431, 'wrong': 2559, 'two': 302, 'dudeen': 1440, 'wide': 503, 'glens': 1359, 'danny': 521, 'waiters': 2501, 'eight': 2376, 'thru': 1175, 'bleak': 2162, 'twist': 1835, 'offer': 1606, 'chords': 2467, 'limbs': 1999, 'island': 1730, 'lily': 612, 'foreign': 2584, 'false': 2596, 'white': 174, 'phantom': 2321, 'joys': 802, 'consent': 747, 'encumbered': 1513, 'leaned': 2593, 'quarters': 2080, 'hardly': 715, 'painted': 1862, 'soldier': 1293, 'scruff': 1435, 'lilt': 2648, 'stray': 883, 'maids': 2686, 'yonder': 1069, 'guards': 1757, 'endless': 861, 'bright': 137, 'admiration': 1369, 'dwelling': 2185, 'sing': 186, 'uncle': 2361, 'passes': 2140, 'begging': 1171, 'crown': 690, 'sunflower': 1570, 'yore': 605, 'irish': 192, 'banter': 1933, 'covered': 2438, 'robin': 2523, 'over': 84, 'fondly': 959, 'grow': 526, 'saying': 218, 'sights': 1397, 'trod': 1093, 'granted': 2433, 'marchin': 1437, 'reilly': 919, 'bondage': 1669, 'bought': 1987, 'ours': 996, 'close': 965, 'warrior': 2462, 'liquor': 1354, 'tied': 1339, 'stack': 2083, 'chocolate': 1444, 'enemy': 1723, 'brooks': 1840, 'days': 171, 'anger': 1257, 'honor': 969, 'duns': 2503, 'slainte': 695, 'take': 62, 'moon': 165, 'hope': 373, 'mythology': 1634, 'lovers': 2578, 'any': 389, 'drink': 246, 'fashion': 1686, 'and': 2, 'affray': 2050, 'fourth': 1670, 'moor': 926, 'fine': 159, 'find': 147, 'travel': 909, 'halfway': 1416, 'britches': 2217, 'believe': 684, 'trees': 1146, 'drank': 1981, 'wrote': 1695, 'learnin': 1610, 'boots': 2115, 'youth': 681, 'sunday': 648, 'harmony': 1674, 'enamour': 1934, 'gaze': 900, 'walking': 2272, 'breeze': 1466, 'coal': 2442, 'bring': 504, 'finger': 1207, 'sentence': 888, 'soars': 2431, 'shapes': 928, 'cleared': 2052, 'lusty': 1221, 'odaly': 1807, 'tease': 1953, 'damer': 1945, 'coat': 756, 'nigh': 2045, 'souls': 622, 'when': 21, 'majestic': 1500, 'freebirds': 2291, 'indies': 1935, 'mash': 2329, 'shines': 2354, 'flowers': 401, 'wee': 891, 'wed': 766, 'a': 5, 'angels': 807, 'single': 1954, 'acres': 1796, 'wet': 2389, 'city': 507, 'aroon': 953, 'useless': 1959, 'wooden': 2127, 'get': 122, 'enemies': 1511, 'chirping': 2556, 'wishful': 1638, 'rocky': 225, 'gem': 2242, 'ofttimes': 2640, 'sky': 510, 'fellows': 396, 'teacher': 1620, 'lofty': 914, 'numbers': 1758, 'starved': 1666, 'parents': 824, 'remorseless': 2091, 'tyrant': 2090, 'thorn': 899, 'clothes': 615, 'somewhere': 2105, 'kellswater': 643, 'blind': 2539, 'oneill': 1476, 'lack': 2142, 'venture': 983, 'avick': 1640, 'grave': 988, 'glance': 2569, 'his': 40, 'hit': 2245, 'cloak': 2283, 'wheel': 292, 'musha': 546, 'courtesy': 1927, 'hid': 2324, 'mccarthy': 1849, 'pianos': 2121, 'appear': 886, 'sportin': 2683, 'delirium': 1261, 'him': 115, 'iron': 1302, 'primrose': 1365, 'loch': 1503, 'rogue': 2021, 'em': 1026, 'verdant': 2580, 'miles': 2229, 'rare': 1061, 'suppose': 1863, 'distance': 834, 'street': 293, 'lamenting': 2312, 'lullaby': 1275, 'bollin': 2685, 'door': 244, 'grandmother': 1198, 'glossy': 2284, 'havent': 1018, 'mcgilligan': 1811, 'toast': 1898, 'yere': 1733, 'grandest': 1643, 'drowsily': 2540, 'rig': 1745, 'rie': 1287, 'o': 67, 'keep': 204, 'stirring': 2546, 'someone': 1115, 'awaitin': 2130, 'hearty': 734, 'keen': 2075, 'hearts': 575, 'fear': 469, 'bit': 609, 'other': 500, 'july': 2231, 'cot': 603, 'din': 2149, 'since': 231, 'big': 975, 'bid': 525, 'month': 869, 'cruel': 450, 'sprightly': 2547, 'earthly': 1237, 'changing': 2263, 'barrow': 658, 'color': 455, 'dig': 2085, 'riches': 1067, 'times': 254, 'penny': 2659, 'swan': 2145, 'mans': 1103, 'goggles': 2407, 'devils': 1754, 'many': 242, 'glory': 2340, 'jury': 1350, 'broad': 659, 'shoving': 2558, 'kept': 1163, 'grand': 699, 'everywhere': 2114, 'lying': 1518, 'bullet': 1589, 't': 1544, 'mind': 235, 'mine': 332, 'place': 238, 'begin': 2107, 'fighting': 1009, 'trade': 880, 'shannon': 1501, 'ground': 253, 'captive': 1930, 'provost': 1624, 'away': 56, 'wexford': 674, 'pipe': 1054, 'dom': 1672, 'spancil': 452, 'dow': 1167, 'killarney': 598, 'ould': 473, 'sends': 954, 'daylight': 1081, 'father': 100, 'stepped': 270, 'brow': 1006, 'brian': 1480, 'crowds': 2207, 'sweeter': 946, 'excise': 572, 'body': 1117, 'mooney': 1008, 'weeping': 826, 'redeem': 2161, 'bacon': 1815, 'couples': 1844, 'flying': 2219, 'have': 72, 'corner': 2385, 'ructions': 1801, 'let': 118, 'beware': 798, 'cap': 2282, 'theyre': 439, 'suffer': 1335, 'damsel': 885, 'macdonagh': 1580, 'slipped': 1348, 'forget': 464, 'gilgarra': 1211, 'grows': 399, 'wailing': 835, 'creeping': 1225, 'cure': 2003, 'great': 343, 'boatsman': 1379, 'happy': 405, 'gentle': 696, 'horror': 1598, 'deep': 794, 'receiver': 2657, 'laughs': 1706, 'collar': 2405, 'rigs': 2030, 'amiable': 1929, 'whistle': 2262, 'consumed': 1965, 'locality': 2017, 'listening': 2535, 'mountain': 138, 'rent': 758, 'feegee': 1445, 'thady': 2423, 'hardship': 1251, 'smile': 203, 'those': 419, 'begged': 1265, 'be': 30, 'blood': 519, 'tender': 665, 'by': 28, 'serve': 1331, 'laughters': 2646, 'flowing': 2316, 'l\xc3\xa1mh': 1673, 'going': 256, 'trinity': 1625, 'sod': 2615, 'landlord': 2163, 'haste': 706, 'armless': 1734, 'neer': 167, 'spoke': 593, 'crystal': 1195, 'rosy': 630, 'music': 972, 'people': 411, 'pale': 501, 'plenty': 281, 'ross': 1505, 'palm': 2286, 'carriages': 1033, 'night': 65, 'decay': 457, 'ballyjamesduff': 341, 'ware': 1961, 'with': 17, 'tenderest': 1619, 'warm': 647, 'hover': 1233, 'asunder': 2468, 'reel': 732, 'sheep': 2157, 'swearing': 1741, 'cho': 1616, 'waters': 2524, 'recall': 1909, 'skreen': 2582, 'raising': 2516, 'right': 395, 'slime': 1317, 'crying': 661, 'wishing': 2517, 'fists': 1031, 'linnets': 2650, 'vengeance': 2059, 'load': 2047, 'soothed': 1925, 'raise': 567, 'blithe': 2000, 'wish': 220, 'that': 15, 'returning': 2625, 'than': 176, 'leap': 2110, 'seven': 887, 'brogues': 1988, 'waiting': 563, 'reflections': 1382, 'money': 268, 'renown': 1601, 'sister': 1976, 'tuned': 1919, 'none': 408, 'windswept': 2296, 'mallow': 360, 'butcher': 2335, 'shawl': 2250, 'tea': 1816, 'goes': 178, 'morrow': 746, 'forefathers': 1603, 'cries': 1667, 'vision': 769, 'oil': 1456, 'banks': 284, 'wander': 602, 'saxon': 1507, 'threw': 923, 'felt': 829, 'short': 875, 'meeting': 1690, 'gently': 585, 'led': 610, 'fell': 355, 'leg': 718, 'gown': 2251, 'shore': 376, 'lead': 976, 'required': 2012, 'barley': 258, 'reminded': 1222, 'just': 169, 'cail\xc3\xadn': 556, 'colleen': 329, 'cloud': 2301, 'sunbeam': 2522, 'table': 845, 'hadnt': 1793, 'stick': 534, 'magee': 1434, 'ray': 1548, 'would': 83, 'dewy': 2586, 'priests': 1605, 'energy': 2154, 'faithful': 2466, 'cavalry': 1138, 'wood': 1779, 'flaxen': 2400, 'impropriety': 1614, 'beguile': 2643, 'ale': 2627, 'end': 508, 'fill': 498, 'all': 12, 'farming': 1358, 'enough': 918, 'stealing': 812, 'tears': 314, 'rosie': 514, 'union': 1595, 'high': 123, 'career': 2246, 'flood': 2309, 'good': 94, 'mary': 128, 'currabawn': 2387, 'bishop': 1658, 'buttoned': 2137, 'dust': 817, 'swings': 1206, 'brightest': 1708, 'into': 190, 'ones': 276, 'resound': 1514, 'eyes': 107, 'theyd': 1916, 'writin': 724, 'brightly': 2564, 'humble': 1707, 'peeping': 2412, 'cans': 2328, 'soared': 2319, 'troubles': 1679, 'jigs': 2031, 'mooncoin': 1190, 'julia': 1832, 'blackwater': 1487, 'liverpool': 2037, 'october': 1966, 'coast': 1398, 'cease': 997, 'buried': 859, 'lakes': 1497, 'n': 1789, 'lick': 1870, 'grove': 1205, 'found': 347, 'hosannahs': 1515, 'dublins': 1406, 'resemble': 1109, 'beneath': 757, 'dont': 214, 'could': 103, 'return': 616, 'betrays': 2464, 'parted': 1244, 'weel': 1897, 'rough': 1464, 'roddy': 357, 'sounded': 1820, 'meelia': 1853, 'ride': 1498, 'weep': 1334, 'cork': 2679, 'startin': 2483, 'playboy': 1654, 'corn': 446, 'waitin': 2494, 'crew': 1132, 'windows': 2498, 'passing': 2268, 'ghosts': 866, 'blaze': 1604, 'sober': 908, 'died': 201, 'farmers': 773, 'shoulder': 878, 'sunny': 2518, 'lamp': 1948, 'babes': 2290, 'sweetest': 968, 'clouded': 1263, 'smokey': 1538, 'climb': 2587, 'cherry': 2441, 'pots': 2326, 'broke': 1122, 'streams': 1405, 'free': 206, 'searching': 2333, 'bottle': 2337, 'below': 2081, 'simply': 1523, 'slowly': 1189, 'barney': 2331, 'pat': 1577, 'simple': 1270, 'deepest': 1378, 'rise': 1058, 'wonders': 1368, 'sitting': 1159, 'powerful': 1865, 'cry': 474, 'fail': 1522, 'neck': 1092, 'borne': 1465, 'growin': 2620, 'fair': 69, 'land': 93, 'diggin': 1184, 'mellow': 1187, 'painful': 1694, 'army': 2677, 'sighed': 619, 'lattice': 1204, 'falling': 2026, 'arms': 215, 'praise': 587, 'climate': 1318, 'charmin': 1607, 'peek': 2392, 'turns': 688, 'heres': 288, 'companions': 2430, 'chains': 852, 'drowsy': 809, 'wouldst': 1560, 'treated': 1780, 'endearing': 1557, 'do': 181, 'groups': 1845, 'coming': 1174, 'de': 379, 'drums': 1014, 'da': 354, 'kathleen': 541, 'sailing': 1065, 'december': 1123, 'runctions': 1869, 'eye': 543, 'laity': 1661, 'want': 728, 'higher': 1776, 'ink': 1389, 'around': 263, 'allegators': 1778, 'chanters': 1882, 'boo': 1176, 'noon': 1064, 'afford': 1936, 'generation': 978, 'treasures': 2325, 'oflynn': 532, 'form': 782, 'boy': 194, 'prowling': 1531, 'fleet': 960, 'checking': 1645, 'bow': 116, 'fore': 1430, 'barrin': 1443, 'didnt': 480, 'market': 2453, 'deceiver': 1744, 'banish': 1066, 'snow': 289, 'sinking': 447, 'gruff': 1418, 'dozen': 1105, 'dream': 367, 'please': 463, 'spent': 255, 'give': 193, 'anguish': 2171, 'sunset': 1775, 'root': 2616, 'row': 266, 'stones': 1388, 'neagh': 1504, 'room': 550, 'presence': 815, 'roof': 1126, 'roe': 1489, 'pride': 768, 'nature': 2486, 'rob': 2588, 'know': 121, 'jest': 989, 'thayology': 1635, 'miss': 1048, 'mist': 828, 'falter': 2443, 'fifty': 2355, 'links': 1512, 'whispering': 1419, 'fields': 230, 'whirring': 2544, 'forced': 1140, 'mccree': 2485, 'march': 2054, 'minute': 1803, 'wandered': 778, 'town': 71, 'filled': 2665, 'sleeping': 583, 'guril': 1461, 'our': 63, 'toes': 1011, 'cock': 2225, 'out': 68, 'kneel': 1519, 'gave': 141, 'bowl': 1739, 'things': 801, 'awakened': 1755, 'sound': 451, 'he': 33, 'famous': 1626, 'treasure': 1063, 'twisted': 1696, 'possessed': 1262, 'fashions': 2484, 'sword': 770, 'swore': 552, 'sailor': 804, 'bath': 2482, 'mossy': 673, 'sanctified': 1510, 'stay': 592, 'solace': 1926, 'star': 297, 'workin': 2473, 'farthing': 1971, 'toome': 308, 'pistols': 545, 'array': 2066, 'faster': 2500, 'colors': 2492, 'yeoman': 1137, 'longs': 2567, 'dreaming': 1131, 'munroe': 1490, 'bridge': 262, 'been': 338, 'chickenless': 1736, 'frame': 2174, 'sorely': 1912, 'games': 1572, 'gallant': 942, 'assembled': 2208, 'safely': 2038, 'beer': 1080, 'charming': 935, 'moved': 2143, 'fond': 542, 'kinds': 1828, 'come': 51, 'comb': 1712, 'friend': 762, 'words': 1148, 'moves': 2146, 'horse': 477, 'home': 89, 'spotted': 2394, 'say': 125, 'silent': 2427, 'saw': 210, 'sat': 388, 'sighing': 2553, 'introduction': 1874, 'salley': 678, 'boil': 2040, 'journey': 819, 'alladin': 1949, 'sad': 651, 'little': 112, 'shakes': 2565, 'on': 16, 'leane': 2279, 'oh': 36, 'famine': 2294, 'cold': 221, 'memory': 743, 'ringlets': 729, 'of': 6, 'taras': 1821, 'fool': 2039, 'field': 950, 'gay': 391, 'eileen': 429, 'shaken': 589, 'danger': 1970, 'or': 81, 'an': 113, 'am': 216, 'wealth': 1944, 'ah': 505, 'brother': 1141, 'claret': 789, 'ay': 1283, 'opened': 844, 'winging': 1236, 'listen': 1798, 'at': 38, 'as': 20, 'sight': 2472, 'bells': 479, 'reason': 1125, 'way': 164, 'friends': 208, 'was': 13, 'again': 90, 'punch': 1813, 'lilies': 2529, 'cheerily': 2542, 'glow': 1549, 'ireland': 239, 'toomebridge': 2076, 'minstrel': 586, 'fashioned': 2408, 'rapier': 1212, 'neath': 2521, 'man': 119, 'moaning': 1153, 'purer': 1373, 'lays': 2566, 'judge': 494, 'mad': 1838, 'tuning': 2419, 'may': 136, 'hear': 85, 'look': 298, 'thou': 76, 'lure': 1903, 'cootehill': 1417, 'casement': 2561, 'drunk': 1391, 'drop': 714, 'cask': 1805, 'fainted': 735, 'heavens': 697, 'shelah': 1168, 'songs': 731, 'pipers': 1179, 'donegal': 1622, 'thats': 170, 'pipes': 422, 'piper': 1878, 'try': 2254, 'leaving': 822, 'wearin': 795, 'calico': 2396, 'irelands': 2243, 'pretty': 187, 'canal': 1527, 'truly': 964, 'dolans': 1818, 'likewise': 2668, 'trudged': 2363, 'ringin': 1451, 'hogshead': 2082, 'poor': 189, 'kissed': 471, 'bubbling': 2032, 'hanging': 1119, 'shook': 993, 'pepper': 1021, 'heed': 2189, 'd': 1631, 'without': 482, 'shoot': 2672, 'strife': 998, 'throbbing': 2323, 'heel': 1052, 'soul': 166, 'mollys': 1762, 'wobblin': 2019, 'few': 874, 'banner': 2070, 'footmen': 2667, 'crooning': 1199, 'fled': 2426, 'pardon': 2636, 'flows': 1070, 'flowr': 1615, 'courtin': 1036, 'wind': 323, 'wine': 846, 'sick': 1393, 'wont': 438, 'youths': 2654, 'sweethearts': 1056, 'returned': 1785, 'dim': 818, 'countin': 1022, 'whisperings': 1442, 'swamps': 1772, 'doubts': 2304, 'cares': 865, 'forth': 1023, 'passerby': 2241, 'small': 1049, 'derry': 618, 'odd': 1759, 'misfortune': 1342, 'forty': 1316, 'date': 1960, 'bantry': 2237, 'behind': 211, 'myself': 730, 'bide': 1516, 'further': 1857, 'linnet': 1902, 'while': 109, 'fol': 378, 'declared': 1861, 'stole': 309, 'mourne': 1185, 'coo': 2520, 'brings': 800, 'their': 57, 'cow': 1921, 'widout': 1613, 'keeping': 810, 'did': 53, 'die': 175, 'calling': 522, 'beguiled': 1015, 'fairest': 740, 'can': 73, 'having': 2511, 'skin': 2413, 'politicians': 2506, 'foe': 2092, 'goods': 2147, 'against': 495, 'skip': 1492, 'square': 2247, 'selling': 1345, 'once': 152, 'jenny': 571, 'fiddles': 2132, 'stainless': 2069, 'rattled': 1742, 'myrtle': 1887, 'flame': 1938, 'son': 767, 'twined': 1697, 'wains': 1303, 'save': 663, 'almanack': 1958, 'wings': 907, 'whiskey': 397, 'slung': 2461, 'nothing': 437, 'fades': 2600, 'botany': 2298, 'complexions': 2488, 'whispers': 921, 'cant': 490, 'ave': 1520, 'blow': 693, 'hiii': 2221, 'basin': 2378, 'ringing': 462, 'chain': 956, 'nut': 775, 'diddle': 841, 'remain': 934, 'chair': 2126, 'watched': 394, 'waxes': 2599, 'rich': 702, 'playing': 970, 'bedim': 1699, 'cool': 1194, 'daughters': 2206, 'pistol': 2670, 'dancers': 1118, 'enthralled': 1914, 'banished': 1833, 'funds': 2507, '1803': 1297, 'captain': 692, 'advance': 1612, 'raindrops': 2390, 'tried': 538, 'hill': 311, 'middle': 608, 'near': 274, 'rollin': 1034, 'someones': 2123, 'neat': 640, 'least': 1183, 'c\xef\xbf\xbdta': 2176, 'gray': 1824, 'clouds': 1529, 'caubeen': 2623, 'leaning': 1546, 'fairly': 1169, 'full': 952, 'prison': 493, 'phil': 922, 'apron': 2395, 'couldnt': 2671, 'noiselessly': 2543, 'abandon': 2153, 'there': 41, 'carry': 1016, 'girded': 2460, 'rungum': 2661, 'brave': 299, 'lazy': 987, 'why': 344, 'gaiety': 1660, 'indeed': 1729, 'sacked': 1554, 'yearly': 2451, 'light': 226, 'looks': 2447, 'colonel': 440, 'nights': 905, 'motion': 1471, 'beaming': 1181, 'care': 435, 'living': 2509, 'goin': 544, 'bade': 2168, 'sow': 2474, 'fierce': 1473, 'shannons': 2302, 'house': 380, 'tom': 973, 'ruby': 1913, 'makes': 386, 'anchor': 2203, 'stream': 708, 'bragh': 348, 'clear': 370, 'cr\xc3\xbaite': 558, 'guineas': 1182, 'manys': 2224, 'connolly': 1584, 'also': 1951, 'invitation': 1802, 'logic': 1633, 'fixed': 1556, 'tags': 2403, 'martha': 894, 'seemd': 2307, 'fire': 428, 'trial': 1351, 'creeds': 1682, 'ogradys': 1819, 'elf': 2235, 'gone': 124, 'jollity': 1653, 'ceannt': 1593, 'among': 1082, 'early': 139, 'owen': 1488, 'axe': 1540, 'arrived': 1044, 'nelly': 1823, 'guardian': 1224, 'rink': 2414, 'darkning': 1703, 'bogs': 1990, 'bare': 765, 'bard': 2463, 'oer': 92, 'shone': 877, 'prisoner': 1030, 'boneless': 1735, 'county': 453, 'stupendous': 1468, 'lowly': 2573, 'what': 163, 'joy': 502, 'stool': 577, 'curious': 2006, 'boreen': 2232, 'stood': 777, 'own': 132, 'alas': 368, 'soft': 364, 'devil': 384, 'old': 34, 'bond': 1321, 'stony': 2170, 'chanced': 805, 'hair': 177, 'kilkenny': 653, 'bore': 1102, 'stagger': 2084, 'clyde': 1332, 'shell': 497, 'chanting': 1922, 'bran': 2330, 'watch': 584, 'stone': 901, 'hillways': 1886, 'aloft': 2320, 'danced': 383, 'soar': 1899, 'homeward': 2144, 'change': 686, 'connemara': 2334, 'dress': 2253, 'jeremy': 1790, 'brothers': 595, 'heather': 1215, 'smiling': 359, 'others': 1768, 'peggy': 1810, 'dew': 434, 'taxes': 2159, 'sheeps': 2255, 'came': 108, 'asked': 304, 'remark': 990, 'part': 759, 'wrath': 2311, 'pricked': 2598, 'ties': 1770, 'frightened': 821, 'seen': 157, 'between': 1028, 'arose': 1295, 'shortly': 2202, 'bread': 2353, 'piety': 1611, 'mistaken': 1761, 'answer': 540, 'break': 478, 'squeezed': 1880, 'reported': 1386, 'darling': 172, 'wondering': 2641, 'fiddlin': 2099, 'who': 106, 'macdiarmada': 1581, 'caused': 882, 'every': 345, 'hunt': 1993, 'grieve': 1375, 'birds': 350, 'sweeps': 2495, 'doing': 1827, 'seems': 2649, 'hung': 1338, 'married': 381, 'antrim': 1095, 'creole': 548, 'doodle': 2415, 'omalley': 897, 'worsened': 1260, 'ring': 711, 'meaning': 884, 'learn': 1843, 'dinner': 2383, 'fall': 551, 'slept': 1172, 'battered': 1792, 'craw': 1591, 'sake': 1890, 'soldiers': 611, 'crossroads': 2252, 'peoples': 1681, 'mccann': 1134, 'upon': 334, 'laid': 321, 'asleep': 1279, 'fray': 754, 'i': 3, 'parlay': 2458, 'huggin': 1276, 'roving': 488, 'eily': 2421, 'accident': 1846, 'preacher': 1618, 'news': 2604, 'gathered': 1855, 'ill': 37, 'farewell': 725, 'name': 449, 'write': 635, 'bridgit': 896, 'holds': 1700, 'seeing': 2172, 'remind': 2612, 'puts': 2570, 'jail': 723, 'straight': 924, 'twas': 261, 'winter': 2439, 'honest': 2374, 'traveled': 2079, 'youd': 638, 'guff': 1454, 'scattered': 1555, 'river': 951, 'your': 24, 'nonsensical': 1829, 'sails': 2590, 'passion': 1685, 'herring': 1892, 'malone': 915, 'friendly': 1111, 'guide': 1687, 'hours': 459, 'wall': 426, 'walk': 994, 'paddy': 413, 'polkas': 1830, 'clay': 1325, 'late': 339, 'hour': 1588, 'looked': 342, 'streets': 917, 'placed': 1585, 'coaxin': 1646, 'pleasures': 1356, 'dove': 2220, 'fish': 1409, 'forever': 278, 'coolin': 2560, 'rings': 529, 'dripping': 1361, 'morgan': 1864, 'wild': 105, 'kissd': 2314, 'beautiful': 418, 'true': 74, 'bowlin': 1767, 'able': 319, 'lovely': 198, 'crossings': 1771, 'mountains': 290, 'poisoning': 1592, 'roved': 2579, 'pity': 1062, 'bloomed': 2280, 'plundering': 1664, 'jewels': 1663, 'long': 101, 'sailors': 2120, 'whenever': 890, 'off': 372, 'aisey': 1164, 'dying': 424, 'tore': 467, 'stand': 287, 'tory': 1508, 'death': 491, 'ribbons': 1051, 'christmas': 1296, 'oft': 836, 'frivolity': 1650, 'londons': 2471, 'sulloon': 1731, 'gets': 1721, 'thoughts': 855, 'whose': 1485, 'onaisy': 1647, 'dawn': 1150, 'cornwall': 2266, 'enlisted': 2362, 'hush': 1274, 'honey': 840, 'maidens': 1217, 'boat': 2589, 'tight': 2675, 'fireside': 2260, 'magic': 564, 'fading': 1559, 'tempest': 1474, 'connaught': 1084, 'awoke': 2227, 'joined': 2049, 'cliffs': 506, 'answered': 606, 'forbid': 2605, 'erins': 1086, 'bouys': 2036, 'flavours': 1220, 'children': 694, 'barn': 2386, 'here': 425, 'stationed': 2678, 'struggle': 823, 'du': 1747, 'crossed': 949, 'temper': 2041, 'reels': 2574, 'bank': 2288, 'women': 400, 'play': 251, 'forgot': 1710, 'setting': 2122, 'betray': 2270, 'powr': 2642, 'englishmen': 1599, 'boyhood': 1383, 'ned': 1050, 'cream': 2489, 'call': 328, 'juice': 1035, 'potatoes': 1046, 'new': 188, 'surely': 776, 'coins': 1751, 'through': 88, 'di': 1284, 'shelter': 2188, 'travelled': 2244, 'swear': 2681, 'whom': 2281, 'carmody': 1856, 'shells': 848, 'vanished': 1414, 'no': 35, 'bluff': 1459, 'iull': 1691, 'steal': 799, 'na': 559, 'rusty': 1301, 'summer': 570, 'rattling': 1989, 'so': 32, 'took': 114, 'warning': 1353, 'knock': 1714, 'fare': 637, 'flock': 1642, 'grant': 1941, 'fisted': 2676, 'farm': 1795, 'pound': 1794, 'mermaids': 1472, 'claim': 1655, 'drove': 1481, 'how': 267, 'crowded': 1247, 'tbe': 2135, 'wheat': 2475, 'kilkee': 1400, 'grew': 2292, 'babies': 1422, 'fills': 2420, 'follol': 1995, 'cockle': 2602, 'glimpse': 1253, 'bacchus': 2504, 'played': 691, 'pocket': 1209, 'yellow': 2104, 'fearless': 1088, 'marching': 2071, 'followed': 2201, 'follow': 1343, 'frightning': 1991, 'bonnet': 2445, 'tune': 486, 'echo': 2429, 'childer': 1639, 'holes': 1173, 'yer': 1432, 'yet': 369, 'four': 536, 'entangled': 1883, 'forgiveness': 1266, 'irishmen': 675, 'dhu': 783, 'throw': 2614, 'like': 64, 'strolling': 1344, 'lad': 712, 'steps': 733, 'past': 597, 'lazily': 2571, 'pass': 806, 'bellows': 1881, 'sligo': 1765, 'daisy': 2365, 'lay': 847, 'child': 581, 'baby': 2346, 'law': 625, 'grass': 259, 'wines': 2634, 'thrush': 1055, 'meet': 179, 'braes': 2581, 'written': 2106, 'making': 831, 'had': 82, 'maintain': 1460, 'tumblin': 1718, 'has': 158, 'hat': 1116, 'lilacs': 1000, 'beauty': 402, 'lonely': 312, 'shall': 120, 'pleasant': 2198, 'knows': 1329, 'known': 963, 'else': 748, 'graveyard': 2345, 'smashed': 1867, 'rode': 1312, 'shalt': 1238, 'down': 44, 'shillelagh': 2044, 'ere': 2577, 'dungannon': 518, 'ready': 2666, 'doesnt': 629, 'melting': 1384, 'begotten': 2505, 'desire': 1942, 'where': 47, 'comrades': 322, 'remember': 1114, 'raw': 483, 'rat': 1825, 'head': 761, 'roses': 250, 'ran': 466, 'ral': 601, 'born': 511, 'rah': 1996, 'mither': 1269, 'rag': 1571, 'love': 22, 'lovd': 1151, 'ponchartrain': 1037, 'sigh': 2167, 'quart': 2336, 'wit': 1908, 'rest': 535, 'courted': 895, 'roared': 830, 'far': 144, 'tuam': 1979, 'alive': 340, 'morning': 104, 'declining': 2530, 'air': 520, 'noiseless': 2575, 'move': 1124, 'ago': 377, 'eat': 1292, 'gangs': 2476, 'slew': 1725, 'slopes': 912, 'wonder': 662, 'bird': 742, 'hath': 1098, 'coffee': 2097, 'for': 10, 'touch': 2129, 'cannot': 403, 'caress': 2638, 'whatever': 2195, 'dressed': 1145, 'finnertys': 1850, 'foemans': 1099, 'croft': 1525, 'milking': 1060, 'robbin': 2673, 'rights': 2465, 'till': 130, 'van': 623, 'stuck': 1590, 'dresses': 2481, 'adieu': 1041, 'tell': 265, 'chaneys': 1868, 'heavenward': 816, 'renownd': 1609, 'shoes': 1121, 'vow': 930, 'hail': 1717, 'liffey': 941, 'nolans': 1817, 'quite': 318, 'water': 233, 'chop': 1543, 'might': 248, 'nearly': 1724, 'stile': 1160, 'talk': 1623, 'dub': 1166, 'still': 99, 'spring': 524, 'holyhead': 2033, 'rostrevor': 1495, 'murray': 1715, 'bravery': 1319, 'gain': 2454, 'something': 2018, 'landlady': 2629, 'croppy': 1144, 'breaks': 2527, 'genie': 1950, 'some': 127, 'bound': 382, 'swell': 1308, 'newborn': 1421, 'used': 476, 'unseen': 2178, 'view': 655, 'reached': 2597, 'athenry': 779, 'stern': 2179, 'sort': 1446, 'almost': 2013, 'cottage': 1786, 'plight': 1259, 'went': 173, 'after': 771, 'fought': 374, 'doves': 2519, 'fame': 1602, 'only': 143, 'precious': 580, 'luck': 1349, 'thundering': 1470, 'plow': 2259, 'shine': 1193, 'scarce': 1924, 'meadows': 870, 'policeman': 1458, 'stop': 664, 'yon': 2689, 'smelled': 1537, 'bothered': 2371, 'courteous': 2446, 'kerrigan': 1860, 'fast': 838, 'lady': 1177, 'rust': 2258, 'beginning': 2537, 'dropped': 2373, 'kind': 260, 'king': 1130, 'command': 1677, 'leisure': 1910, 'fountain': 1196, 'gasworks': 1524, 'hoops': 1851, 'tree': 257, 'shadow': 676, 'adored': 1561, 'paddys': 2002, 'torn': 2072, 'saber': 1743, 'kerry': 1178, 'loving': 460, 'turning': 2213, 'guard': 1139, 'molly': 291, 'sleep': 458, 'marble': 1387, 'ban': 2058, 'left': 98, 'flesh': 1732, 'casey': 234, 'bag': 2409, 'bad': 881, 'back': 111, 'hobble': 2046, 'path': 2310, 'girl': 148, 'bay': 531, 'winds': 627, 'bar': 1346, 'pate': 2370, 'venus': 2287, 'coaxing': 2234, 'kindliest': 1621, 'clearly': 842, 'in': 8, 'bosom': 2364, 'mother': 161, 'expire': 1943, 'lave': 1659, 'legs': 1728, 'naught': 1127, 'awhile': 1911, 'skibbereen': 565, 'gifts': 1558, 'cattle': 2158, 'longing': 1243, 'wait': 2493, 'trusty': 2595, 'laws': 2618, 'dogs': 1992, 'drew': 720, 'takes': 797, 'terrance': 1848, 'taken': 275, 'standing': 763, 'distressful': 2610, 'mingling': 2308, 'graceful': 2112, 'destroy': 1676, 'lucan': 1478, 'danes': 1482, 'lets': 1683, 'corporal': 271, 'welcome': 352, 'mavrone': 1837, 'vowed': 1753, 'divils': 1629, 'ye': 96, 'five': 858, 'real': 2651, 'twinkle': 2647, 'young': 48, 'thank': 2351, 'goblins': 1986, 'lassie': 614, 'remarkable': 1457, 'roamin': 2680, 'course': 2663, 'sits': 2261, 'peat': 1894, 'matters': 2293, 'kilgary': 1020, 'returnd': 2303, 'melodious': 1923, 'tailor': 2214, 'glass': 390, 'befall': 2196, 'country': 468, 'comfort': 1068, 'wonderful': 985, 'spins': 1201, 'teasing': 2515, 'moonlight': 1197, 'everybody': 2380, 'bidding': 2352, 'oak': 2594, 'breaking': 1076, 'time': 133, 'maid': 145, 'expressed': 2478, 'main': 957, 'board': 2200, 'hapless': 2060, 'rambling': 2199, 'very': 286, 'roll': 1469, 'hall': 1822, 'twould': 2008, 'half': 760, 'choose': 1974, 'drives': 2510, 'sheilings': 1889, 'wintry': 2103, 'hell': 857, 'held': 837, 'tandy': 2608, 'ranks': 2459, 'feeble': 2173, 'knocked': 1032, 'strings': 2134, 'sets': 903, 'prove': 1001, 'set': 412, 'hello': 2102, 'sustaining': 1252, 'glamour': 1452, 'see': 77, 'whisper': 1455, 'marry': 1040, 'vain': 1784, 'earnest': 2067, 'needs': 1684, 'silver': 654, 'gazed': 2240, 'passage': 1463, 'story': 966, 'wives': 853, 'william': 2269, 'watching': 1402, 'spending': 1186, 'build': 1216, 'revenge': 2182, 'covers': 2136, 'raking': 787, 'dale': 1226, 'revealing': 1229, 'told': 666, 'ache': 1693, 'adoration': 2434, 'monger': 1410, 'god': 310, 'tralee': 791, 'tipped': 1043, 'fever': 832, 'wisdom': 2184, 'prodigal': 2637, 'crow': 1420, 'got': 247, 'plain': 657, 'lingers': 2568, 'ladies': 1045, 'said': 55, 'lake': 727, 'from': 27, 'strains': 1232, 'bent': 568, 'tapping': 2549, 'ghost': 1411, 'haunt': 1370, 'sport': 1327, 'leaves': 489, 'men': 307, 'slavery': 862, 'glen': 423, 'rigadoo': 573, 'met': 222, 'rosin': 140, 'gives': 2435, 'spirits': 2025, 'won': 772, 'springing': 1161, 'spire': 2344, 'never': 49, 'hares': 913, 'desperate': 1258, 'oconnell': 1479, 'mussels': 1408, 'village': 2342, 'transparently': 1937, 'hurroo': 1013, 'saint': 671, 'happened': 1847, 'closet': 2133, 'sweep': 2477, 'sweet': 58, 'confess': 2635, 'go': 50, 'australia': 1298, 'trace': 2098, 'dismay': 1597, 'rested': 1998, 'party': 1797, 'tarry': 2456, 'thought': 362, 'jolly': 889, 'west': 656, 'visions': 1228, 'fight': 1100, 'live': 249, 'enchanted': 2432, 'bould': 2216, 'show': 2622, 'yoke': 2257, 'getting': 2416, 'catch': 1905, 'dreamed': 1526, 'jig': 1836, 'health': 642, 'pie': 1720, 'pig': 2359, 'were': 42, 'walked': 2276, 'whilst': 2299, 'belling': 2497, 'wrapped': 2175, 'english': 620, 'peace': 363, 'judy': 1806, 'blast': 1969, 'innocent': 1652, 'future': 1932, 'gold': 406, 'hades': 1314, 'bulging': 2410, 'stewards': 2514, 'valley': 432, 'always': 272, 'attend': 1223, '23rd': 2204, 'worn': 1429, 'strength': 2350, 'work': 929, 'fresh': 707, 'word': 984, 'wore': 1147, 'storied': 936, 'pluck': 2688, 'addle': 1449, 'outside': 1280, 'variety': 1608, 'fellow': 1716, 'boldly': 2424, 'mantle': 2533, 'kicked': 1875, 'deludhering': 2256, 'poured': 1596, 'caught': 2109, 'consolation': 2089, 'fingers': 1010, 'groves': 1496, 'began': 448, 'rain': 864, 'besides': 1957, 'nonsense': 1834, 'couple': 2087, 'happiness': 1341, 'fortune': 470, 'neighbors': 2212, 'enjoy': 1973, 'drain': 1439, 'longer': 430, 'harbour': 2297, 'views': 1493, 'regret': 2652, 'handsome': 404, 'frighten': 2398, 'egg': 1737, 'bold': 721, 'upwards': 2078, 'hoping': 2348, 'dare': 2621, 'dignity': 2187, 'sparkling': 1689, 'tiny': 1110, 'destiny': 1240, 'shut': 2357, 'dark': 1154, 'paint': 1782, 'comicality': 1657, 'buy': 784, 'strand': 2585, 'rebelled': 2295, 'm\xef\xbf\xbdr': 2177, 'but': 19, 'diemans': 624, 'ocean': 650, 'blarney': 931, 'sold': 749, 'art': 371, 'weirs': 1547, 'wheels': 1412, 'yours': 1162, 'are': 31, 'arm': 717, 'youre': 195, 'boyne': 940, 'blue': 264, 'fright': 2264, 'bairns': 1896, 'dadda': 2508, 'satisfaction': 1858, 'dancing': 353, 'daddy': 1025, 'hushed': 1517, 'put': 131, 'earth': 1330, 'bailiffs': 2502, 'east': 1675, 'wages': 2011, 'whos': 1133, 'um': 1746, 'clergy': 1662, 'cakes': 1814, 'lasses': 2425, 'stout': 750, 'brown': 444, 'us': 245, 'up': 86, 'much': 596, 'runaway': 867, 'gowns': 2119, 'hullabaloo': 1877, 'waking': 2183, 'twilight': 1315, 'puff': 1441, 'face': 326, 'elation': 2437, 'tough': 920, 'snare': 1362, 'faded': 1254, 'elevations': 1773, 'feels': 2128, 'gorey': 1506, 'castles': 1553, 'academy': 1841, 'parting': 703, 'travelians': 2289, 'notes': 1136, 'ranting': 1587, 'horses': 1311, 'stalwart': 2068, 'agree': 1545, 'ditty': 1271, 'wheeled': 916, 'rather': 628, 'toe': 738, 'too': 110, 'called': 736, 'velvet': 639, 'me': 9, 'knitting': 2541, 'inheritance': 2190, 'bundle': 1083, 'teardrop': 2644, 'spleen': 2165, 'court': 1364, 'ended': 1320, 'well': 91, 'breathes': 1230, 'aged': 569, 'merry': 269, 'spreading': 2534, 'conchology': 1636, 'laughtcr': 2116, 'taking': 1313, 'fairer': 1071, 'lies': 454, 'feeling': 813, 'another': 324, 'tap': 2108, 'wid': 227, 'win': 1777, 'bridle': 1289, 'bravry': 2470, 'chara': 2548, 'fairy': 961, 'note': 1901, 'hurray': 2048, 'this': 70, 'vessel': 1246, 'blessing': 2347, 'leave': 207, 'hurrah': 974, 'bonnie': 398, 'iii': 1323, 'ever': 126, 'turn': 509, 'company': 1906, 'lashings': 1812, 'amid': 1242, 'places': 911, 'equality': 1656, 'even': 1029, 'nest': 701, 'next': 240, 'boundless': 1467, 'lifting': 1648, 'athy': 713, 'thyme': 184, 'emigrants': 1241, 'slower': 792, 'cut': 1985, 'queen': 1337, 'scarcely': 1702, 'sharp': 1539, 'grey': 649, 'share': 1688, 'flapping': 2551, 'firelights': 1310, 'queer': 1726, 'hed': 487, 'babys': 1427, 'meal': 1893, 'rude': 2150, 'rove': 2562, 'mb\xc3\xb3': 560, 'empty': 1120, 'powrfulest': 1617, 'her': 18, 'hes': 282, 'mcbryde': 1582, 'does': 1038, 'oftimes': 2528, 'moment': 833, 'broken': 617, 'marched': 1094, 'loud': 566, 'dance': 668, 'contented': 1692, 'lightly': 1202, 'purse': 2450, 'marches': 1090, 'awake': 943, 'fiddlers': 2113, 'foaming': 1788, 'lanigans': 305, 'beyond': 1704, 'side': 295, 'youll': 200, 'wasnt': 2023, 'cousin': 1142, 'locks': 1075, 'band': 327, 'hurley': 2684, 'gleam': 1709, 'carrigfergus': 904, 'bann': 939, 'devonshire': 1947, 'johnny': 156, 'sure': 149, 'you': 11, 'sorrowful': 1425, 'stable': 1290, 'bu': 2660, 'drubbing': 2366, 'maiden': 441, 'immortal': 590, 'bees': 1804, 'lough': 2278, 'tay': 1156, 'embarrass': 1956, 'mainsails': 1304, 'heavy': 1101, 'bubblin': 2009, 'bracken': 1888, 'floor': 764, 'chance': 2655, 'rags': 2402, 'furious': 2073, 'parlour': 2267, 'moth': 1309, 'strong': 992, 'battleshield': 2186, 'hemp': 2062, 'add': 1939, 'jewel': 537, 'seas': 1256, 'doleful': 1722, 'day': 66, 'about': 2061, 'dad': 982, 'mourn': 1333, 'affection': 1972, 'soon': 252, 'racking': 2512, 'dai': 1286, 'modestly': 1281, 'knowing': 1967, 'mauser': 971, 'sun': 217, 'lane': 2341, 'struck': 417, 'merrily': 1200, 'prayers': 1264, 'cursing': 2499, 'pains': 1977, 'ask': 1042, 'galway': 1087, 'sometimes': 2138, 'regard': 2490, 'deny': 2275, 'drifting': 1530, 'waves': 820, 'robbing': 1763, 'clare': 1399, 'jumped': 2027, 'sooner': 1326, 'tired': 726, 'we': 46, 'ma': 710, 'ry': 1282, 'duram': 796, 'blooming': 361, 'style': 2007, 'suits': 2117, 'slave': 1931, 'whirligig': 1831, 'my': 7, 'ra': 213, 'lower': 793, 're': 1285, 'dime': 2100, 'hearted': 1079, 'phelim': 1871, 'salute': 1928, 'rebel': 1322, 'rarely': 2372, 'waken': 2393, 'pearse': 1578, 'blows': 2369, 'james': 1583, 'prettiest': 871, 'blight': 2155, 'twenty': 485, 'jailer': 2674, 'sully': 2469, 'bell': 588, 'sudden': 2313, 'turned': 689}
2690



input_sequences = []
for line in corpus:
	token_list = tokenizer.texts_to_sequences([line])[0]
	for i in range(1, len(token_list)):
		n_gram_sequence = token_list[:i+1]
		input_sequences.append(n_gram_sequence)

# pad sequences 
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
xs, labels = input_sequences[:,:-1],input_sequences[:,-1]

ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

In [5]:

print(tokenizer.word_index['in'])
print(tokenizer.word_index['the'])
print(tokenizer.word_index['town'])
print(tokenizer.word_index['of'])
print(tokenizer.word_index['athy'])
print(tokenizer.word_index['one'])
print(tokenizer.word_index['jeremy'])
print(tokenizer.word_index['lanigan'])


print(xs[6])



In [7]:

print(ys[6])



In [8]:

print(xs[5])
print(ys[5])


In [9]:

print(tokenizer.word_index)

In [10]:

model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))
adam = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
#earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
history = model.fit(xs, ys, epochs=100, verbose=1)
#print model.summary()
print(model)


import matplotlib.pyplot as plt


def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.show()

In [12]:

plot_graphs(history, 'acc')

In [16]:

seed_text = "I've got a bad feeling about this"
next_words = 100
  
for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
	predicted = model.predict_classes(token_list, verbose=0)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)
