<a href="https://colab.research.google.com/github/vvamsi91/RTML_AS3/blob/main/RTML_AS3_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

!pip install ipython-autotime
%load_ext autotime

Collecting ipython-autotime
  Downloading ipython_autotime-0.3.2-py2.py3-none-any.whl (7.0 kB)
Collecting jedi>=0.16 (from ipython->ipython-autotime)
  Downloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi, ipython-autotime
Successfully installed ipython-autotime-0.3.2 jedi-0.19.1
time: 377 µs (started: 2024-03-09 03:31:22 +00:00)


In [None]:

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, classification_report, f1_score
from tqdm.notebook import tqdm
import seaborn as sns
import torch.nn.functional as F

torch.set_printoptions(edgeitems=2)
torch.manual_seed(123)
np.random.seed(123)


time: 2.39 ms (started: 2024-03-09 03:31:45 +00:00)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

time: 58.4 ms (started: 2024-03-09 03:31:55 +00:00)


In [None]:


class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()

        # Set hyperparameters
        self.hidden_size = hidden_size

        # Define layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Embed input sequence
        embedded = self.embedding(x)

        # Apply LSTM layer
        output, _ = self.lstm(embedded)

        # Extract the last time step and pass through the linear layer
        output = self.fc(output[:, -1, :])

        return output

class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()

        # Set hyperparameters
        self.hidden_size = hidden_size

        # Define layers
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Embed input sequence
        embedded = self.embedding(x)

        # Apply GRU layer
        output, _ = self.gru(embedded)

        # Extract the last time step and pass through the linear layer
        output = self.fc(output[:, -1, :])

        return output

def predict_next_char(model, char_to_ix, ix_to_char, initial_str, max_length):
    """
    Predict the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_ix (dict): Mapping from characters to indices.
        ix_to_char (dict): Mapping from indices to characters.
        initial_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        str: Predicted next character.
    """
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]


time: 2.2 ms (started: 2024-03-09 03:31:57 +00:00)


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import requests

# Step 1: Download the dataset
url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
response = requests.get(url)
text = response.text  # This is the entire text data

# Step 2: Prepare the dataset
sequence_length = 20

# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = [encoded_text[i:i + sequence_length] for i in range(0, len(encoded_text) - sequence_length)]
targets = encoded_text[sequence_length:]

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Step 3: Create a dataset class
class CharDataset(Dataset):
    def __init__(self, sequences, targets):
        self.sequences = sequences
        self.targets = targets

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, index):
        return self.sequences[index], self.targets[index]

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Step 4: Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


time: 4.99 s (started: 2024-03-09 03:32:10 +00:00)


In [None]:
def train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=20):
    """
    Train and evaluate a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.
        criterion: Loss function.
        optimizer: Optimizer for training.
        train_loader (DataLoader): DataLoader for the training set.
        test_loader (DataLoader): DataLoader for the test set.
        device (str): Device for training and evaluation (default is 'cuda').
        n_epochs (int): Number of training epochs.

    Returns:
        dict: Dictionary containing training loss, validation loss, validation accuracy, and the trained model.
    """
    model.to(device)

    train_loss_list, val_loss_list, val_accuracy_list = [], [], []

    # Training loop
    for epoch in range(n_epochs):
        running_loss = 0.0
        model.train()
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        train_loss_list.append(running_loss / len(train_loader))

        # Validation loop
        running_loss = 0.0
        correct = 0
        total = 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

            val_loss_list.append(running_loss / len(test_loader))
            val_accuracy = 100 * (correct / total)
            val_accuracy_list.append(val_accuracy)
            print(f'Epoch {epoch + 1}, Training loss: {train_loss_list[-1]:.4f}, Validation loss: {val_loss_list[-1]:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

    return {
        'train_loss': train_loss_list,
        'val_loss': val_loss_list,
        'val_accuracy': val_accuracy_list,
        'model': model
    }


time: 1.07 ms (started: 2024-03-09 03:32:20 +00:00)


In [None]:
# Set hyperparameters
hidden_size = 128
learning_rate = 0.005
epochs = 100

# Instantiate the CharLSTM model
model = CharLSTM(input_size=len(chars), hidden_size=hidden_size, output_size=len(chars))

# Move the model to the specified device (e.g., 'cuda' or 'cpu')
model.to(device)

# Define the CrossEntropyLoss criterion
criterion = nn.CrossEntropyLoss()

# Define the Adam optimizer for training
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

time: 579 ms (started: 2024-03-09 03:32:26 +00:00)


In [None]:
lstm_results = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.7496, Validation loss: 1.6346, Validation Accuracy: 50.72%
Epoch 2, Training loss: 1.5917, Validation loss: 1.5862, Validation Accuracy: 52.09%
Epoch 3, Training loss: 1.5592, Validation loss: 1.5802, Validation Accuracy: 52.10%
Epoch 4, Training loss: 1.5470, Validation loss: 1.5604, Validation Accuracy: 52.83%
Epoch 5, Training loss: 1.5388, Validation loss: 1.5624, Validation Accuracy: 52.59%
Epoch 6, Training loss: 1.5363, Validation loss: 1.5551, Validation Accuracy: 53.10%
Epoch 7, Training loss: 1.5365, Validation loss: 1.5626, Validation Accuracy: 52.71%
Epoch 8, Training loss: 1.5390, Validation loss: 1.5642, Validation Accuracy: 52.52%
Epoch 9, Training loss: 1.5413, Validation loss: 1.5635, Validation Accuracy: 52.59%
Epoch 10, Training loss: 1.5400, Validation loss: 1.5631, Validation Accuracy: 52.66%
time: 4min 13s (started: 2024-03-09 03:32:30 +00:00)


In [None]:
def get_total_parameters(model):
    """
    Calculate the total number of parameters in a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.

    Returns:
        int: Total number of parameters.
    """
    return sum(p.numel() for p in model.parameters())

# Get total parameters for the LSTM model
total_params = get_total_parameters(lstm_results['model'])
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 148801
time: 969 µs (started: 2024-03-09 03:38:46 +00:00)


In [None]:
def predict_and_print_next_char(model, char_to_int, int_to_char, test_str, max_length):
    """
    Predict and print the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_int (dict): Mapping from characters to indices.
        int_to_char (dict): Mapping from indices to characters.
        test_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        None
    """
    predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str, max_length)
    print(f"Predicted next character: '{predicted_char}'")

# Predict and print the next character for the LSTM model
predict_and_print_next_char(lstm_results['model'], char_to_int, int_to_char, test_str, max_length=20)


Predicted next character: 'g'
time: 3.23 ms (started: 2024-03-09 03:42:31 +00:00)


In [None]:
import torch.nn as nn
import torch.optim as optim

# Set hyperparameters
hidden_size = 256
learning_rate = 0.001
epochs = 80

# Instantiate the CharGRU model
model = CharGRU(input_size=len(chars), hidden_size=hidden_size, output_size=len(chars))

# Move the model to the specified device (e.g., 'cuda' or 'cpu')
model.to(device)

# Define the CrossEntropyLoss criterion
criterion = nn.CrossEntropyLoss()

# Define the Adam optimizer for training
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


time: 12.2 ms (started: 2024-03-09 03:44:53 +00:00)


In [18]:
gru_results_20 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.6911, Validation loss: 1.5546, Validation Accuracy: 53.07%
Epoch 2, Training loss: 1.5001, Validation loss: 1.4942, Validation Accuracy: 54.42%
Epoch 3, Training loss: 1.4554, Validation loss: 1.4690, Validation Accuracy: 55.25%
Epoch 4, Training loss: 1.4301, Validation loss: 1.4575, Validation Accuracy: 55.54%
Epoch 5, Training loss: 1.4131, Validation loss: 1.4493, Validation Accuracy: 55.77%
Epoch 6, Training loss: 1.4016, Validation loss: 1.4403, Validation Accuracy: 55.97%
Epoch 7, Training loss: 1.3916, Validation loss: 1.4380, Validation Accuracy: 56.04%
Epoch 8, Training loss: 1.3850, Validation loss: 1.4312, Validation Accuracy: 56.23%
Epoch 9, Training loss: 1.3816, Validation loss: 1.4269, Validation Accuracy: 56.37%
Epoch 10, Training loss: 1.3787, Validation loss: 1.4293, Validation Accuracy: 56.43%
time: 5min 15s (started: 2024-03-09 03:45:49 +00:00)


In [19]:
total_params = sum(p.numel() for p in gru_results_20['model'].parameters())
print(f'Total number of parameters in the model: {total_params}')

Total number of parameters in the model: 428097
time: 1.79 ms (started: 2024-03-09 03:51:05 +00:00)


In [21]:
# Set sequence length
sequence_length = 30

# Create a character mapping to integers
chars = sorted(list(set(text)))
char_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = [encoded_text[i:i + sequence_length] for i in range(0, len(encoded_text) - sequence_length)]
targets = encoded_text[sequence_length:]

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size

# Use random_split for training and test datasets
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# Create DataLoader instances
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

time: 5.09 s (started: 2024-03-09 03:54:52 +00:00)


In [24]:

hidden_size = 128
learning_rate = 0.001
epochs = 5

model = CharLSTM(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

time: 7.56 ms (started: 2024-03-09 03:56:13 +00:00)


In [25]:
lstm_results_30 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.8294, Validation loss: 1.6304, Validation Accuracy: 51.47%
Epoch 2, Training loss: 1.5760, Validation loss: 1.5420, Validation Accuracy: 53.60%
Epoch 3, Training loss: 1.5094, Validation loss: 1.5014, Validation Accuracy: 54.75%
Epoch 4, Training loss: 1.4723, Validation loss: 1.4778, Validation Accuracy: 55.15%
Epoch 5, Training loss: 1.4475, Validation loss: 1.4593, Validation Accuracy: 55.66%
Epoch 6, Training loss: 1.4295, Validation loss: 1.4495, Validation Accuracy: 55.95%
Epoch 7, Training loss: 1.4158, Validation loss: 1.4375, Validation Accuracy: 56.31%
Epoch 8, Training loss: 1.4040, Validation loss: 1.4262, Validation Accuracy: 56.50%
Epoch 9, Training loss: 1.3937, Validation loss: 1.4251, Validation Accuracy: 56.67%
Epoch 10, Training loss: 1.3853, Validation loss: 1.4212, Validation Accuracy: 56.66%
time: 4min 48s (started: 2024-03-09 03:56:38 +00:00)


In [27]:
def get_total_parameters(model):
    """
    Calculate the total number of parameters in a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.

    Returns:
        int: Total number of parameters.
    """
    return sum(p.numel() for p in model.parameters())

# Get total parameters for the LSTM model with sequence length 30
total_params = get_total_parameters(lstm_results_30['model'])
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 148801
time: 986 µs (started: 2024-03-09 04:02:02 +00:00)


In [28]:
def predict_and_print_next_char(model, char_to_int, int_to_char, test_str, max_length):
    """
    Predict and print the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_int (dict): Mapping from characters to indices.
        int_to_char (dict): Mapping from indices to characters.
        test_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        None
    """
    predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str, max_length)
    print(f"Predicted next character: '{predicted_char}'")

# Predict and print the next character for the LSTM model with sequence length 30
predict_and_print_next_char(lstm_results_30['model'], char_to_int, int_to_char, test_str, max_length=30)


Predicted next character: 'g'
time: 3.63 ms (started: 2024-03-09 04:02:06 +00:00)


In [29]:
hidden_size = 256
learning_rate = 0.001
epochs = 5

model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


time: 10.8 ms (started: 2024-03-09 04:02:09 +00:00)


In [30]:
gru_results_30 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.6879, Validation loss: 1.5394, Validation Accuracy: 53.49%
Epoch 2, Training loss: 1.4951, Validation loss: 1.4787, Validation Accuracy: 54.77%
Epoch 3, Training loss: 1.4479, Validation loss: 1.4545, Validation Accuracy: 55.83%
Epoch 4, Training loss: 1.4222, Validation loss: 1.4342, Validation Accuracy: 56.23%
Epoch 5, Training loss: 1.4057, Validation loss: 1.4257, Validation Accuracy: 56.64%
Epoch 6, Training loss: 1.3939, Validation loss: 1.4263, Validation Accuracy: 56.54%
Epoch 7, Training loss: 1.3851, Validation loss: 1.4193, Validation Accuracy: 56.76%
Epoch 8, Training loss: 1.3777, Validation loss: 1.4155, Validation Accuracy: 56.93%
Epoch 9, Training loss: 1.3733, Validation loss: 1.4076, Validation Accuracy: 56.94%
Epoch 10, Training loss: 1.3682, Validation loss: 1.4132, Validation Accuracy: 57.01%
time: 6min 57s (started: 2024-03-09 04:02:21 +00:00)


In [33]:
def get_total_parameters(model):
    """
    Calculate the total number of parameters in a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.

    Returns:
        int: Total number of parameters.
    """
    return sum(p.numel() for p in model.parameters())

# Get total parameters for the GRU model with sequence length 30
total_params = get_total_parameters(gru_results_30['model'])
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 428097
time: 1.31 ms (started: 2024-03-09 04:09:35 +00:00)


In [34]:
def predict_and_print_next_char(model, char_to_int, int_to_char, test_str, max_length):
    """
    Predict and print the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_int (dict): Mapping from characters to indices.
        int_to_char (dict): Mapping from indices to characters.
        test_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        None
    """
    predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str, max_length)
    print(f"Predicted next character: '{predicted_char}'")

# Predict and print the next character for the GRU model with sequence length 30
predict_and_print_next_char(gru_results_30['model'], char_to_int, int_to_char, test_str, max_length=30)


Predicted next character: 'g'
time: 3.73 ms (started: 2024-03-09 04:09:37 +00:00)


In [36]:
hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharLSTM(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

time: 15.9 ms (started: 2024-03-09 04:10:05 +00:00)


In [37]:
lstm_results__hidden_20 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.7001, Validation loss: 1.5269, Validation Accuracy: 53.85%
Epoch 2, Training loss: 1.4764, Validation loss: 1.4500, Validation Accuracy: 55.83%
Epoch 3, Training loss: 1.4161, Validation loss: 1.4181, Validation Accuracy: 56.55%
Epoch 4, Training loss: 1.3808, Validation loss: 1.3949, Validation Accuracy: 57.12%
Epoch 5, Training loss: 1.3561, Validation loss: 1.3813, Validation Accuracy: 57.63%
Epoch 6, Training loss: 1.3363, Validation loss: 1.3719, Validation Accuracy: 57.70%
Epoch 7, Training loss: 1.3213, Validation loss: 1.3640, Validation Accuracy: 58.20%
Epoch 8, Training loss: 1.3084, Validation loss: 1.3593, Validation Accuracy: 58.00%
Epoch 9, Training loss: 1.2965, Validation loss: 1.3545, Validation Accuracy: 58.22%
Epoch 10, Training loss: 1.2866, Validation loss: 1.3498, Validation Accuracy: 58.44%
time: 7min 37s (started: 2024-03-09 04:10:08 +00:00)


In [43]:
def get_total_parameters(model):
    """
    Calculate the total number of parameters in a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.

    Returns:
        int: Total number of parameters.
    """
    return sum(p.numel() for p in model.parameters())

# Get total parameters for the LSTM model with hidden size 20
total_params = get_total_parameters(lstm_results__hidden_20['model'])
print(f'Total number of parameters in the model: {total_params}')



Total number of parameters in the model: 559681
time: 970 µs (started: 2024-03-09 04:21:55 +00:00)


In [45]:
def predict_and_print_next_char(model, char_to_int, int_to_char, test_str, max_length):
    """
    Predict and print the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_int (dict): Mapping from characters to indices.
        int_to_char (dict): Mapping from indices to characters.
        test_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        None
    """
    predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str, max_length)
    print(f"Predicted next character: '{predicted_char}'")

# Predict and print the next character for the LSTM model with hidden size 20 and sequence length 30
predict_and_print_next_char(lstm_results__hidden_20['model'], char_to_int, int_to_char, test_str, max_length=30)


Predicted next character: 'g'
time: 27.2 ms (started: 2024-03-09 04:22:14 +00:00)


In [46]:

hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

time: 9.36 ms (started: 2024-03-09 04:22:17 +00:00)


In [47]:
gru_results__hidden_20 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.6885, Validation loss: 1.5468, Validation Accuracy: 53.36%
Epoch 2, Training loss: 1.4936, Validation loss: 1.4820, Validation Accuracy: 54.93%
Epoch 3, Training loss: 1.4460, Validation loss: 1.4532, Validation Accuracy: 55.77%
Epoch 4, Training loss: 1.4220, Validation loss: 1.4418, Validation Accuracy: 55.84%
Epoch 5, Training loss: 1.4050, Validation loss: 1.4285, Validation Accuracy: 56.45%
Epoch 6, Training loss: 1.3918, Validation loss: 1.4198, Validation Accuracy: 56.62%
Epoch 7, Training loss: 1.3833, Validation loss: 1.4166, Validation Accuracy: 57.05%
Epoch 8, Training loss: 1.3766, Validation loss: 1.4094, Validation Accuracy: 57.16%
Epoch 9, Training loss: 1.3717, Validation loss: 1.4137, Validation Accuracy: 56.91%
Epoch 10, Training loss: 1.3690, Validation loss: 1.4117, Validation Accuracy: 56.87%
time: 6min 50s (started: 2024-03-09 04:22:19 +00:00)


In [49]:
def predict_and_print_next_char(model, char_to_int, int_to_char, test_str, max_length):
    """
    Predict and print the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_int (dict): Mapping from characters to indices.
        int_to_char (dict): Mapping from indices to characters.
        test_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        None
    """
    predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str, max_length)
    print(f"Predicted next character: '{predicted_char}'")

# Predict and print the next character for the GRU model with hidden size 20 and sequence length 30
predict_and_print_next_char(gru_results__hidden_20['model'], char_to_int, int_to_char, test_str, max_length=30)


Predicted next character: 'g'
time: 4.51 ms (started: 2024-03-09 04:33:06 +00:00)


In [51]:
def predict_and_print_next_char(model, char_to_int, int_to_char, test_str, max_length):
    """
    Predict and print the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_int (dict): Mapping from characters to indices.
        int_to_char (dict): Mapping from indices to characters.
        test_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        None
    """
    predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str, max_length)
    print(f"Predicted next character: '{predicted_char}'")

# Predict and print the next character for the GRU model with hidden size 20 and sequence length 30
predict_and_print_next_char(gru_results__hidden_20['model'], char_to_int, int_to_char, test_str, max_length=30)


Predicted next character: 'g'
time: 3.37 ms (started: 2024-03-09 04:33:29 +00:00)


In [52]:
# Define the character mapping to integers
char_to_int = {ch: i for i, ch in enumerate(sorted(set(text)))}
int_to_char = {i: ch for i, ch in enumerate(sorted(set(text)))}

# Encode the text into integers
encoded_text = [char_to_int[ch] for ch in text]

# Create sequences and targets
sequences = [encoded_text[i:i + sequence_length] for i in range(len(encoded_text) - sequence_length)]
targets = encoded_text[sequence_length:]

# Convert lists to PyTorch tensors
sequences = torch.tensor(sequences, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


time: 5.5 s (started: 2024-03-09 04:33:33 +00:00)


In [54]:
hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharLSTM(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

time: 11.2 ms (started: 2024-03-09 04:34:12 +00:00)


In [55]:
lstm_results__hidden_30 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.6992, Validation loss: 1.5287, Validation Accuracy: 53.86%
Epoch 2, Training loss: 1.4756, Validation loss: 1.4589, Validation Accuracy: 55.45%
Epoch 3, Training loss: 1.4139, Validation loss: 1.4218, Validation Accuracy: 56.47%
Epoch 4, Training loss: 1.3778, Validation loss: 1.4024, Validation Accuracy: 57.05%
Epoch 5, Training loss: 1.3521, Validation loss: 1.3862, Validation Accuracy: 57.38%
Epoch 6, Training loss: 1.3324, Validation loss: 1.3759, Validation Accuracy: 57.74%
Epoch 7, Training loss: 1.3165, Validation loss: 1.3724, Validation Accuracy: 57.75%
Epoch 8, Training loss: 1.3037, Validation loss: 1.3685, Validation Accuracy: 57.95%
Epoch 9, Training loss: 1.2932, Validation loss: 1.3591, Validation Accuracy: 58.25%
Epoch 10, Training loss: 1.2833, Validation loss: 1.3538, Validation Accuracy: 58.46%
time: 7min 35s (started: 2024-03-09 04:34:30 +00:00)


In [58]:
def calculate_total_parameters(model):
    """
    Calculate the total number of parameters in a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.

    Returns:
        int: Total number of parameters.
    """
    return sum(p.numel() for p in model.parameters())

# Get total parameters for the LSTM model with hidden size 30
total_params = calculate_total_parameters(lstm_results__hidden_30['model'])
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 559681
time: 2.28 ms (started: 2024-03-09 04:42:50 +00:00)


In [59]:
def predict_and_print_next_char(model, char_to_int, int_to_char, test_str, max_length):
    """
    Predict and print the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_int (dict): Mapping from characters to indices.
        int_to_char (dict): Mapping from indices to characters.
        test_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        None
    """
    predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str, max_length)
    print(f"Predicted next character: '{predicted_char}'")

# Predict and print the next character for the LSTM model with hidden size 30 and sequence length 30
predict_and_print_next_char(lstm_results__hidden_30['model'], char_to_int, int_to_char, test_str, max_length=30)


Predicted next character: 'g'
time: 4.39 ms (started: 2024-03-09 04:43:05 +00:00)


In [60]:
hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

time: 9.98 ms (started: 2024-03-09 04:43:49 +00:00)


In [61]:
gru_results_hidden_30 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.6907, Validation loss: 1.5413, Validation Accuracy: 53.36%
Epoch 2, Training loss: 1.4937, Validation loss: 1.4820, Validation Accuracy: 54.83%
Epoch 3, Training loss: 1.4465, Validation loss: 1.4581, Validation Accuracy: 55.58%
Epoch 4, Training loss: 1.4209, Validation loss: 1.4466, Validation Accuracy: 56.13%
Epoch 5, Training loss: 1.4029, Validation loss: 1.4328, Validation Accuracy: 56.29%
Epoch 6, Training loss: 1.3913, Validation loss: 1.4292, Validation Accuracy: 56.47%
Epoch 7, Training loss: 1.3837, Validation loss: 1.4245, Validation Accuracy: 56.63%
Epoch 8, Training loss: 1.3758, Validation loss: 1.4232, Validation Accuracy: 56.42%
Epoch 9, Training loss: 1.3707, Validation loss: 1.4211, Validation Accuracy: 56.57%
Epoch 10, Training loss: 1.3690, Validation loss: 1.4189, Validation Accuracy: 56.97%
time: 6min 56s (started: 2024-03-09 04:43:59 +00:00)


In [62]:
def calculate_total_parameters(model):
    """
    Calculate the total number of parameters in a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.

    Returns:
        int: Total number of parameters.
    """
    return sum(p.numel() for p in model.parameters())

# Get total parameters for the GRU model with hidden size 30
total_params = calculate_total_parameters(gru_results_hidden_30['model'])
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 428097
time: 2.46 ms (started: 2024-03-09 04:51:37 +00:00)


In [63]:
def predict_and_print_next_char(model, char_to_int, int_to_char, test_str, max_length):
    """
    Predict and print the next character in a sequence using the trained model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_int (dict): Mapping from characters to indices.
        int_to_char (dict): Mapping from indices to characters.
        test_str (str): The initial sequence.
        max_length (int): Maximum length of the sequence used for prediction.

    Returns:
        None
    """
    predicted_char = predict_next_char(model, char_to_int, int_to_char, test_str, max_length)
    print(f"Predicted next character: '{predicted_char}'")

# Predict and print the next character for the GRU model with hidden size 30 and sequence length 30
predict_and_print_next_char(gru_results_hidden_30['model'], char_to_int, int_to_char, test_str, max_length=30)


Predicted next character: 'g'
time: 4.21 ms (started: 2024-03-09 04:51:42 +00:00)


In [64]:
# Function to create a character mapping to integers
def create_char_mappings(text):
    chars = sorted(list(set(text)))
    char_to_int = {ch: i for i, ch in enumerate(chars)}
    int_to_char = {i: ch for i, ch in enumerate(chars)}
    return char_to_int, int_to_char

# Function to encode text into integers and create sequences with targets
def encode_and_create_sequences(text, sequence_length, char_to_int):
    encoded_text = [char_to_int[ch] for ch in text]

    sequences = []
    targets = []
    for i in range(0, len(encoded_text) - sequence_length):
        seq = encoded_text[i:i+sequence_length]
        target = encoded_text[i+sequence_length]
        sequences.append(seq)
        targets.append(target)

    sequences = torch.tensor(sequences, dtype=torch.long)
    targets = torch.tensor(targets, dtype=torch.long)
    return sequences, targets

# Create character mappings
char_to_int, int_to_char = create_char_mappings(text)

# Encode text into integers and create sequences with targets
sequences, targets = encode_and_create_sequences(text, sequence_length, char_to_int)

# Instantiate the dataset
dataset = CharDataset(sequences, targets)

# Create data loaders
batch_size = 128
train_size = int(len(dataset) * 0.8)
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)


time: 7.11 s (started: 2024-03-09 04:52:42 +00:00)


In [66]:

hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharLSTM(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

time: 11.5 ms (started: 2024-03-09 04:53:12 +00:00)


In [67]:
lstm_results_hidden_50 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.6984, Validation loss: 1.5297, Validation Accuracy: 53.61%
Epoch 2, Training loss: 1.4732, Validation loss: 1.4506, Validation Accuracy: 55.56%
Epoch 3, Training loss: 1.4118, Validation loss: 1.4172, Validation Accuracy: 56.72%
Epoch 4, Training loss: 1.3776, Validation loss: 1.3917, Validation Accuracy: 57.41%
Epoch 5, Training loss: 1.3520, Validation loss: 1.3810, Validation Accuracy: 57.52%
Epoch 6, Training loss: 1.3322, Validation loss: 1.3740, Validation Accuracy: 57.82%
Epoch 7, Training loss: 1.3168, Validation loss: 1.3641, Validation Accuracy: 58.09%
Epoch 8, Training loss: 1.3043, Validation loss: 1.3614, Validation Accuracy: 58.25%
Epoch 9, Training loss: 1.2927, Validation loss: 1.3619, Validation Accuracy: 58.19%
Epoch 10, Training loss: 1.2838, Validation loss: 1.3530, Validation Accuracy: 58.66%
time: 7min 35s (started: 2024-03-09 04:53:31 +00:00)


In [68]:
def calculate_total_parameters(model):
    """
    Calculate the total number of parameters in a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.

    Returns:
        int: Total number of parameters.
    """
    return sum(p.numel() for p in model.parameters())

# Get total parameters for the LSTM model with hidden size 50
total_params = calculate_total_parameters(lstm_results_hidden_50['model'])
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 559681
time: 2.06 ms (started: 2024-03-09 05:02:23 +00:00)


In [69]:
hidden_size = 256
learning_rate = 0.001
epochs = 10

model = CharGRU(len(chars), hidden_size, len(chars))
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

time: 10.4 ms (started: 2024-03-09 05:04:14 +00:00)


In [70]:
gru_results_hidden_50 = train_and_evaluate_model(model, criterion, optimizer, train_loader, test_loader, device='cuda', n_epochs=10)


Epoch 1, Training loss: 1.6843, Validation loss: 1.5365, Validation Accuracy: 53.77%
Epoch 2, Training loss: 1.4914, Validation loss: 1.4855, Validation Accuracy: 54.88%
Epoch 3, Training loss: 1.4452, Validation loss: 1.4534, Validation Accuracy: 55.83%
Epoch 4, Training loss: 1.4199, Validation loss: 1.4355, Validation Accuracy: 56.29%
Epoch 5, Training loss: 1.4026, Validation loss: 1.4350, Validation Accuracy: 56.29%
Epoch 6, Training loss: 1.3907, Validation loss: 1.4254, Validation Accuracy: 56.41%
Epoch 7, Training loss: 1.3833, Validation loss: 1.4276, Validation Accuracy: 56.21%
Epoch 8, Training loss: 1.3763, Validation loss: 1.4181, Validation Accuracy: 56.77%
Epoch 9, Training loss: 1.3723, Validation loss: 1.4145, Validation Accuracy: 56.74%
Epoch 10, Training loss: 1.3672, Validation loss: 1.4126, Validation Accuracy: 56.87%
time: 6min 50s (started: 2024-03-09 05:04:25 +00:00)


In [72]:
def calculate_total_parameters(model):
    """
    Calculate the total number of parameters in a PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.

    Returns:
        int: Total number of parameters.
    """
    return sum(p.numel() for p in model.parameters())

# Get total parameters for the GRU model with hidden size 50
total_params = calculate_total_parameters(gru_results_hidden_50['model'])
print(f'Total number of parameters in the model: {total_params}')


Total number of parameters in the model: 428097
time: 2.44 ms (started: 2024-03-09 05:12:03 +00:00)


In [73]:
def predict_next_character(model, char_to_ix, ix_to_char, initial_str, max_length):
    """
    Predict the next character using a trained PyTorch model.

    Args:
        model (nn.Module): The PyTorch model.
        char_to_ix (dict): Mapping from characters to integers.
        ix_to_char (dict): Mapping from integers to characters.
        initial_str (str): Initial string for prediction.
        max_length (int): Maximum length of the sequence.

    Returns:
        str: Predicted next character.
    """
    model.eval()
    with torch.no_grad():
        initial_input = torch.tensor([char_to_ix[c] for c in initial_str[-max_length:]], dtype=torch.long).unsqueeze(0).to(device)
        prediction = model(initial_input)
        predicted_index = torch.argmax(prediction, dim=1).item()
        return ix_to_char[predicted_index]

# Test the prediction for the GRU model with hidden size 50
predicted_char = predict_next_character(gru_results_hidden_50['model'], char_to_int, int_to_char, test_str, 30)
print(f"Predicted next character: '{predicted_char}'")


Predicted next character: 'g'
time: 4.56 ms (started: 2024-03-09 05:12:06 +00:00)
