## GPU-Accelerated Alice in Wonderland Text Generation

This notebook uses LSTM neural networks with CUDA GPU acceleration to generate text in the style of "Alice in Wonderland". 

**Important:** Restart the kernel and run cells 1-2 first to verify GPU detection!

In [1]:
import numpy as np
import pandas as pd
import torch
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import RNN

from keras.callbacks import ModelCheckpoint

# Check GPU availability
print(f"PyTorch CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

# TensorFlow CPU mode (Windows CUDA setup is complex, PyTorch is preferred)
print("\nNote: TensorFlow running in CPU mode on Windows.")
print("For GPU with TensorFlow on Windows, download CUDA Toolkit from NVIDIA.")
print("PyTorch is already GPU-enabled on this system!")

PyTorch CUDA available: True
GPU Device: NVIDIA GeForce RTX 3050 Laptop GPU
GPU Memory: 4.29 GB

Note: TensorFlow running in CPU mode on Windows.
For GPU with TensorFlow on Windows, download CUDA Toolkit from NVIDIA.
PyTorch is already GPU-enabled on this system!


In [2]:
# Verify GPU availability for both PyTorch and TensorFlow
import torch
print(f"PyTorch CUDA available: {torch.cuda.is_available()}")
print(f"TensorFlow GPU available: {len(tf.config.list_physical_devices('GPU')) > 0}")
if len(tf.config.list_physical_devices('GPU')) > 0:
    print(f"TensorFlow will use: {tf.config.list_physical_devices('GPU')}")


PyTorch CUDA available: True
TensorFlow GPU available: False


In [3]:
from keras.utils import to_categorical

In [4]:
#loading the book
text = (open("alice_in_wonderland.txt").read())
text = text.lower()
#here we are converiing the text into its lower form as in changing the capitals to lower case


In [5]:
#creating character mappig
"""here we are using character mapping to assign an number to a character or a word 
basic idea: here we are mapping a catagorical value to a numberical value 
why? becauese maching understand numbers beter than the characters
this is also the first step in nlp tasks and also very pivotal in an rnn project
"""

#1st we we define characters as a list 
characters = sorted(list(set(text)))
#here characters is a list of all the unique characters in the text

num_to_char = {n:char for n, char in enumerate(characters)}
#here num_to_char is a dictionary the maps the characters to numbers    

char_to_num = {char: n for n, char in enumerate(characters)}
#here char_to_num is a dictionary that maps numbers to characters reverse mapping ig

#defining the vocabulary size
#vocalb_size = len(characters)

#print(f"print the number of unique characters: {vocalb_size}")
#print(f"the unique characters are: {characters}")

#we will remov * and \n from the characters as they are not useful in our model training
characters.remove('*')
characters.remove('\n')
vocalb_size = len(characters)
print(f"the new vocabulary size is: {vocalb_size}")
print(f"the unique characters are: {characters}")



the new vocabulary size is: 44
the unique characters are: [' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '3', ':', ';', '?', '[', ']', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [6]:
#shape fixing foor training the model
x = []
y = []

#creating the sequences and the labels
#here length is the total length of the text
#seq_length is the length of each sequence

length = len(text)
seq_length = 100

##looping through the text to create sequences and labels
for i in range(0, length - seq_length, 1):
    #here we are creating sequences of length seq_length thus i + seq_length
    sequence = text[i:i + seq_length]
    #here text[i:i + seq_length] gives us the sequence from position i to i + seq_length

    #here we are creating the label for each sequence so we take text at position of the sequence and tag it as the label
    label = text[i + seq_length] 

    #now we are appending the sequences and labels to the x and y lists after converting them to their numerical form using the char_to_num dictionary   
    x.append([char_to_num[char] for char in sequence])
    y.append(char_to_num[label])

#priniting the length of the input sequences
print(f"the length of the input sequences is: {len(x)}")


the length of the input sequences is: 148474


In [7]:
#now modifiying them so that they can be used to train the model
x_mod = np.reshape(x, (len(x), seq_length, 1))
#here we are reshaping the x to be of the shape (number of sequences, sequence length, 1)   
x_mod = x_mod / float(vocalb_size)
#here we are normalizing the x_mod by dividing it by the vocabulary size
y_modified = to_categorical(y)
#here we are converting the y to categorical form using one hot encoding


In [8]:
#main training the model - PyTorch LSTM
import torch
import torch.nn as nn
import torch.optim as optim

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Convert data to PyTorch tensors
x_tensor = torch.FloatTensor(x_mod).to(device)
y_tensor = torch.LongTensor(np.argmax(y_modified, axis=1)).to(device)

# Define PyTorch LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        lstm_out, _ = self.lstm(x)
        # Take the last output for classification
        last_out = lstm_out[:, -1, :]
        output = self.fc(last_out)
        return output

# Initialize model
model = LSTMModel(input_size=1, hidden_size=700, num_layers=3, num_classes=y_modified.shape[1]).to(device)
print(f"Model created on GPU: {next(model.parameters()).is_cuda}")
print(f"Model architecture:\n{model}")


Using device: cuda
Model created on GPU: True
Model architecture:
LSTMModel(
  (lstm): LSTM(1, 700, num_layers=3, batch_first=True, dropout=0.2)
  (fc): Linear(in_features=700, out_features=46, bias=True)
)


In [None]:
#Setup training parameters
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0005)

# Store losses for monitoring
train_losses = []


In [None]:
#training the model on GPU
import time
epochs = 20
batch_size = 100

# Create data loader
from torch.utils.data import TensorDataset, DataLoader
dataset = TensorDataset(x_tensor, y_tensor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Set model to training mode
model.train()

# Training loop
print("Starting training on GPU...\n")
print("=" * 60)
print(f"{'Epoch':<8} {'Loss':<12} {'Batch Progress':<20} {'Time':<10}")
print("=" * 60)

start_time = time.time()
for epoch in range(epochs):
    epoch_loss = 0.0
    epoch_start = time.time()
    
    for batch_idx, (batch_x, batch_y) in enumerate(dataloader):
        # Forward pass
        outputs = model(batch_x)
        loss = loss_fn(outputs, batch_y)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        # Clip gradients to prevent explosion
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        epoch_loss += loss.item()
        
        # Show progress bar
        progress = (batch_idx + 1) / len(dataloader)
        bar_length = 15
        filled = int(bar_length * progress)
        bar = '█' * filled + '░' * (bar_length - filled)
        
        if (batch_idx + 1) % max(1, len(dataloader) // 5) == 0:
            print(f"{epoch+1:<8} {epoch_loss/(batch_idx+1):<12.4f} {bar} {progress*100:>5.1f}%", end='\r')
    
    avg_loss = epoch_loss / len(dataloader)
    train_losses.append(avg_loss)
    epoch_time = time.time() - epoch_start
    
    print(f"{epoch+1:<8} {avg_loss:<12.4f} {'█' * 15} 100.0%  {epoch_time:>6.2f}s")

total_time = time.time() - start_time
print("=" * 60)


print(f"Training complete! Total time: {total_time/60:.2f} minutes")print(f"Final loss: {avg_loss:.4f}")

Starting training on GPU...

Epoch    Loss         Batch Progress       Time      
1        0.3710       ███████████████ 100.0%  204.81s
2        0.3497       ███████████████ 100.0%  205.27s
3        0.3314       ███████████████ 100.0%  205.77s
4        0.3128       ███████████████ 100.0%  206.06s
5        0.3070       ███████████████ 100.0%  204.25s
6        0.2931       ███████████████ 100.0%  204.09s
7        0.2868       ███████████████ 100.0%  200.36s
8        0.2742       ███████████████ 100.0%  204.40s
9        0.2717       ███████████████ 100.0%  203.83s
10       0.8698       ███████████████ 100.0%  205.05s
11       2.9746       █████████░░░░░░  60.0%

In [None]:
# Continue training for more epochs
model.train()  # Ensure model is in training mode

additional_epochs = 30
print(f"Continuing training for {additional_epochs} more epochs...")
print("=" * 60)

for epoch in range(additional_epochs):
    epoch_loss = 0.0
    epoch_start = time.time()
    
    for batch_idx, (batch_x, batch_y) in enumerate(dataloader):
        outputs = model(batch_x)
        loss = loss_fn(outputs, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        # Clip gradients to prevent explosion
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        epoch_loss += loss.item()
        
        progress = (batch_idx + 1) / len(dataloader)
        bar_length = 15
        filled = int(bar_length * progress)
        bar = '█' * filled + '░' * (bar_length - filled)
        
        if (batch_idx + 1) % max(1, len(dataloader) // 5) == 0:
            print(f"{epoch+1:<8} {epoch_loss/(batch_idx+1):<12.4f} {bar} {progress*100:>5.1f}%", end='\r')
    
    avg_loss = epoch_loss / len(dataloader)
    train_losses.append(avg_loss)
    epoch_time = time.time() - epoch_start
    
    print(f"{epoch+1:<8} {avg_loss:<12.4f} {'█' * 15} 100.0%  {epoch_time:>6.2f}s")

print("=" * 60)

print(f"Training extended! Current loss: {avg_loss:.4f}")

Continuing training for 30 more epochs...


RuntimeError: cudnn RNN backward can only be called in training mode

In [None]:
#generating text using the trained PyTorch model with temperature sampling
model.eval()  # Set to evaluation mode

# Create a generation-time mapping aligned with the current character list
num_to_char_gen = {n: char for n, char in enumerate(characters)}
char_to_num_gen = {char: n for n, char in enumerate(characters)}
vocab_size_gen = len(characters)

# Use a meaningful seed from the actual text (start of the book)
seed_text = text[:seq_length]
seed_sequence = [char_to_num_gen.get(char, 0) for char in seed_text if char in char_to_num_gen]

# Pad if necessary
while len(seed_sequence) < seq_length:
    seed_sequence.append(0)
seed_sequence = seed_sequence[:seq_length]

generated_text = list(seed_text[:seq_length])

print("Seed text:")
print(''.join(generated_text))
print("\n" + "="*60)
print("Generating text with temperature sampling...\n")

# Temperature for sampling (lower = more conservative, higher = more random)
temperature = 0.5

for i in range(500):
    # Prepare input
    x_input = np.array(seed_sequence).reshape(1, seq_length, 1)
    x_input = torch.FloatTensor(x_input / float(vocab_size_gen)).to(device)
    
    # Predict
    with torch.no_grad():
        pred = model(x_input)
        # Apply temperature
        pred = pred / temperature
        # Get probabilities
        probs = torch.softmax(pred, dim=1).cpu().numpy()[0]
        
        # Clip probabilities to valid vocabulary size
        probs = probs[:vocab_size_gen]
        probs = probs / probs.sum()  # Renormalize
        
        # Sample from the distribution
        pred_index = np.random.choice(len(probs), p=probs)
    
    # Ensure valid index
    if pred_index >= vocab_size_gen:
        pred_index = pred_index % vocab_size_gen
    
    # Append to generated text
    generated_text.append(num_to_char_gen[pred_index])
    
    # Update seed
    seed_sequence.append(pred_index)
    seed_sequence = seed_sequence[1:]

# Print generated text
print("Generated text:")
print(''.join(generated_text))


Generating text...

Generated text sample:
 !"'(),-.03:;?[]_`abcdefghijklmnopqrstuvwxyz !"'(),-.03:;?[]_`abcdefghijklmnopqrstuvwxyz !"'(),-.03:!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!ejcrvgt!k  !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

...
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


In [12]:
import torch
print(torch.cuda.is_available())


True
