In [None]:
import tensorflow as tf
import numpy as np
import random
import sys
import matplotlib.pyplot as plt

# Load dataset (Shakespeare Sonnets as an example)
path = tf.keras.utils.get_file("shakespeare.txt",
                               "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt")
text = open(path, "r", encoding="utf-8").read().lower()
print("Corpus length:", len(text))

# Create a character mapping
chars = sorted(list(set(text)))
# The issue was in this line. 'char' was not defined before being used as a key in the dictionary comprehension
# Solution: Define 'char' by iterating through 'chars' using a for loop.
char_to_idx = {char: i for i, char in enumerate(chars)} # Corrected line
idx_to_char = {i: char for char, i in char_to_idx.items()}

# Convert text to numerical sequences
seq_length = 40  # Length of input sequences
step = 3  # Step size for creating sequences
sequences = []
next_chars = []

for i in range(0, len(text) - seq_length, step):
    sequences.append(text[i : i + seq_length])
    next_chars.append(text[i + seq_length])

print("Total sequences:", len(sequences))

# Convert characters to one-hot encoded form
x = np.zeros((len(sequences), seq_length, len(chars)), dtype=np.bool)
y = np.zeros((len(sequences), len(chars)), dtype=np.bool)

for i, seq in enumerate(sequences):
    for t, char in enumerate(seq):
        x[i, t, char_to_idx[char]] = 1
    y[i, char_to_idx[next_chars[i]]] = 1

# Define the LSTM model
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(128, input_shape=(seq_length, len(chars))),
    tf.keras.layers.Dense(len(chars), activation="softmax")
])

model.compile(loss="categorical_crossentropy", optimizer="adam")

# Train the model
history = model.fit(x, y, batch_size=128, epochs=20)

# Function to sample the next character using temperature scaling
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    return np.random.choice(len(chars), p=preds)

# Generate text using the trained model
def generate_text(seed_text, length=400, temperature=0.5):
    generated = seed_text
    for i in range(length):
        x_pred = np.zeros((1, seq_length, len(chars)))
        for t, char in enumerate(seed_text):
            x_pred[0, t, char_to_idx[char]] = 1

        preds = model.predict(x_pred, verbose=0)[0]
        next_idx = sample(preds, temperature)
        next_char = idx_to_char[next_idx]

        generated += next_char
        seed_text = seed_text[1:] + next_char  # Shift input text

    return generated

# Example generation
seed_text = "the king was standing alone in the "
print(generate_text(seed_text, length=300, temperature=0.5))

Corpus length: 1115394
Total sequences: 371785


  super().__init__(**kwargs)


Epoch 1/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m172s[0m 59ms/step - loss: 2.5799
Epoch 2/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m204s[0m 60ms/step - loss: 1.9933
Epoch 3/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 59ms/step - loss: 1.8289
Epoch 4/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 59ms/step - loss: 1.7363
Epoch 5/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m213s[0m 63ms/step - loss: 1.6610
Epoch 6/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m177s[0m 61ms/step - loss: 1.6168
Epoch 7/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m197s[0m 59ms/step - loss: 1.5702
Epoch 8/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m171s[0m 59ms/step - loss: 1.5370
Epoch 9/20
[1m2905/2905[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 58ms/step - loss: 1.5120
Epoch 10/20
[1m2905/2905[0m [32m━━━━━━━━━━━