In [1]:
import numpy as np
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [2]:
# Load and preprocess the text
filename = "wonderland.txt"
with open(filename, 'r', encoding='utf-8') as file:
    raw_text = file.read().lower()

In [3]:
chars = sorted(list(set(raw_text)))
char_to_int = {c: i for i, c in enumerate(chars)}
int_to_char = {i: c for i, c in enumerate(chars)}

In [4]:
n_chars = len(raw_text)
n_vocab = len(chars)
seq_length = 100
dataX = []
dataY = []

In [5]:
for i in range(n_chars - seq_length):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print("Total Patterns:", n_patterns)

Total Patterns: 163816


In [6]:
X = np.reshape(dataX, (n_patterns, seq_length, 1)) / float(n_vocab)
y = to_categorical(dataY)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

In [7]:
model = Sequential([
    LSTM(256, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.2),
    Dense(y.shape[1], activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

  super().__init__(**kwargs)


In [8]:
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=1, batch_size=128)

[1m1152/1152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m689s[0m 594ms/step - accuracy: 0.1640 - loss: 3.1251 - val_accuracy: 0.2151 - val_loss: 2.9042


<keras.src.callbacks.history.History at 0x23585d83190>

In [None]:
loss, accuracy = model.evaluate(X_val, y_val)
print(f"Validation Loss: {loss:.4f}")
print(f"Validation Accuracy: {accuracy:.4f}")

In [9]:
def generate_text(model, char_to_int, int_to_char, seed_text, length):
    result = []
    pattern = [char_to_int[char] for char in seed_text]
    
    for _ in range(length):
        x = np.reshape(pattern, (1, len(pattern), 1)) / float(n_vocab)
        prediction = model.predict(x, verbose=0)
        index = np.argmax(prediction)
        result.append(int_to_char[index])
        pattern.append(index)
        pattern = pattern[1:]
    
    return seed_text + ''.join(result)

In [12]:
seed_text = "hey everyone "
generated_text = generate_text(model, char_to_int, int_to_char, seed_text, 10)

print("Seed text:")
print(seed_text)
print("\nGenerated text:")
print(generated_text)

Seed text:
hey everyone 

Generated text:
hey everyone to the toe
