In [1]:
import numpy as np
import io
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

In [2]:
# Load and preprocess data
data_URL = 'shakespeare_train.txt'
val_URL = 'shakespeare_valid.txt'

with io.open(data_URL, 'r', encoding='utf-8') as f:
    text = f.read()

with io.open(val_URL, 'r', encoding='utf-8') as f:
    val_text = f.read()

vocab = sorted(set(text))
val_vocab = sorted(set(val_text))

vocab_to_int = {c:i for i, c in enumerate(vocab)}
val_vocab_to_int = {c:i for i, c in enumerate(val_vocab)}

int_to_vocab = np.array(vocab)
val_int_to_vocab = np.array(val_vocab)

train_data = np.array([vocab_to_int[c] for c in text], dtype=np.int32)
val_data = np.array([val_vocab_to_int[c] for c in val_text], dtype=np.int32)

char_dataset = tf.data.Dataset.from_tensor_slices(train_data)
# val_dataset = tf.data.Dataset.from_tensor_slices(val_data)

seq_length = 100
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [3]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text


# for seq in sequences.take(1):
#     print(seq.numpy())

dataset = sequences.map(split_input_target)
# for input_text, target_text in dataset.take(2):
#     print("Input Text:")
#     print(input_text.numpy())
#     print("Target Text:")
#     print(target_text.numpy())
#     print()

In [4]:
BATCH_SIZE = 64
BUFFER_SIZE = 10000
dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

# Calculate the number of batches for training
total_batches = len(dataset)
train_batches = int(0.8 * total_batches)

# Split the dataset into training and validation datasets
train_dataset = dataset.take(train_batches)
val_dataset = dataset.skip(train_batches)

vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024

def build_model_rnn(vocab_size, embedding_dim, rnn_units, batch_size):

    model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,batch_input_shape=[batch_size, None]),
    tf.keras.layers.GRU(rnn_units,return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
    return model

def build_model_lstm(vocab_size, embedding_dim, rnn_units, batch_size):
    model = tf.keras.models.Sequential()

    model.add(tf.keras.layers.Embedding(
      input_dim=vocab_size,
      output_dim=embedding_dim,
      batch_input_shape=[batch_size, None]
    ))

    model.add(tf.keras.layers.LSTM(
      units=rnn_units,
      return_sequences=True,
      stateful=True,
      recurrent_initializer=tf.keras.initializers.GlorotNormal()
    ))

    model.add(tf.keras.layers.Dense(vocab_size))
  
    return model

model = build_model_rnn(vocab_size = len(vocab),embedding_dim=embedding_dim,rnn_units=rnn_units,batch_size=BATCH_SIZE)

for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

model.summary()

(64, 100, 67) # (batch_size, sequence_length, vocab_size)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           17152     
                                                                 
 gru (GRU)                   (64, None, 1024)          3938304   
                                                                 
 dense (Dense)               (64, None, 67)            68675     
                                                                 
Total params: 4,024,131
Trainable params: 4,024,131
Non-trainable params: 0
_________________________________________________________________


In [5]:
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

print("Input: \n", repr("".join(int_to_vocab[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(int_to_vocab[sampled_indices ])))

Input: 
 "is foot were equal with his eye,\nAnd chides the sea that sunders him from thence,\nSaying, he'll lade"

Next Char Predictions: 
 "G3!YNwCS'bc'O :tt&$nSfSyKU!ljwtgxTjO,ypK!UIOK\n!H3FgZOX qYh]st\nbWh!cKvZPKFqzYuZzDTM[Na -YuzqPWX.YNO;-"


In [7]:
import os
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

model.compile(optimizer='adam', loss=loss)

# Directory where the checkpoints will be saved
checkpoint_dir = 'checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

EPOCHS=5
history = model.fit(train_dataset, validation_data=val_dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Prediction shape:  (64, 100, 67)  # (batch_size, sequence_length, vocab_size)
scalar_loss:       4.2055025


ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'tensorflow.python.data.ops.dataset_ops.BatchDataset'>]

In [28]:
checkpoint_file = 'checkpoints/ckpt_1'

model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

# model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.load_weights(checkpoint_file)

model.build(tf.TensorShape([1, None]))

model.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_11 (Embedding)    (1, None, 256)            17152     
                                                                 
 gru_11 (GRU)                (1, None, 1024)           3938304   
                                                                 
 dense_11 (Dense)            (1, None, 67)             68675     
                                                                 
Total params: 4,024,131
Trainable params: 4,024,131
Non-trainable params: 0
_________________________________________________________________


In [29]:
def generate_text(model, start_string):

    num_generate = 1000
    input_eval = [vocab_to_int[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)


    text_generated = []

    temperature = 1.0

    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        predictions = tf.squeeze(predictions, 0)
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(int_to_vocab[predicted_id])

    return (start_string + ''.join(text_generated))

In [30]:
print(generate_text(model, start_string=u"Juliet"))

Juliet my wif;
When? my lord, and have watt flord twat mother, may
If what be us his was flatter your droagn
my his man: and ' think thou aw a placy to you:
'Tis stare, a noble vared on the clain, when I haved plysubital mine.

KING HENRY IV:
Nay, sail,
therefore I mone, Fortunan; I till grod all insmember'd.

FERS:
My rony, mind timely feithes wret
Meet her frow fow, where's as may chasticions, belle!

KING HENRBINM:
Becover hel' sens the foin compansage us.

ANTIPHOLUS OF YORLY:
It hat bloody, what man he like a sair; they as erse you hoode such down slainst the valourby.

FALSTAFF:
Desires yeal in time: I cannot gone.

IARO:
O, come, good mercy, non strong bening is with vice, all you highs the thtrows
That is some first Kind a premat liou, O near, all we will not so look abone
not with thereine eetress might.

AlM:
He shall I, an your awn'd in all a short
Or bott. Sur your grief,
And, fout kies? Say you, soldieller's heart it be
Take all that this
heavents; but with the hotes.
You 

In [None]:
import matplotlib.pyplot as plt

# Assuming 'history' is the result of model.fit()

# Plot training and validation loss
plt.figure(figsize=(12, 4))

# Plot Training Loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.title('Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Plot Validation Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['val_loss'], label='Validation Loss', color='orange')
plt.title('Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# Plot learning curve (accuracy)
plt.figure(figsize=(8, 6))

# Plot Training Accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
# Assuming you have validation data
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', color='orange')

plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
