In [24]:
import torch

import torch.nn as nn

In [25]:
# Set seed for reproducibility

torch.manual_seed(42)

<torch._C.Generator at 0x111d077f0>

In [26]:
# Define the character set

chars = list(set('hello'))

char2idx = {ch: idx for idx, ch in enumerate(chars)}

idx2char = {idx: ch for ch, idx in char2idx.items()}

In [27]:
# Input and target

input_str = 'hell'

target_str = 'ello'

In [28]:
# Convert characters to indices

input_seq = torch.tensor([char2idx[ch] for ch in input_str])


target_seq = torch.tensor([char2idx[ch] for ch in target_str])


print("Input indices:", input_seq)

print("Target indices:", target_seq)

Input indices: tensor([0, 3, 2, 2])
Target indices: tensor([3, 2, 2, 1])


In [29]:
# One-hot encoding function

def one_hot_encode(seq, vocab_size):

    one_hot = torch.zeros(len(seq), vocab_size)

    for i , idx in enumerate(seq):

        one_hot[i][idx] = 1.0

    return one_hot

vocab_size = len(chars) # number of unique characters

# One-hot encode input

input_one_hot = one_hot_encode(input_seq, vocab_size)

# Reshape for RNN input: (sequence_len, batch_size, input_size)

input_one_hot = input_one_hot.unsqueeze(1)


print("One-hot input shape:", input_one_hot.shape)  # [seq_len, batch, input_size]
print("One-hot encoded input:\n", input_one_hot)


One-hot input shape: torch.Size([4, 1, 4])
One-hot encoded input:
 tensor([[[1., 0., 0., 0.]],

        [[0., 0., 0., 1.]],

        [[0., 0., 1., 0.]],

        [[0., 0., 1., 0.]]])


In [30]:
class VanillaRNNCell(nn.Module):

    def __init__(self,input_size,hidden_size, output_size):
        super(VanillaRNNCell,self).__init__()

        self.hidden_size = hidden_size

        # Weight matrices for input and hidden state

        self.i2h = nn.Linear(input_size + hidden_size , hidden_size)

        self.h2o = nn.Linear(hidden_size, output_size)

        self.softmax = nn.LogSoftmax(dim=1)

    
    def forward(self, input_t, hidden):

        combined = torch.cat((input_t, hidden),1)

        hidden = torch.tanh(self.i2h(combined))

        output = self.h2o(hidden)

        output = self.softmax(output)

        return output, hidden
    
    def init_hidden(self):

        return torch.zeros(1, self.hidden_size)


In [31]:
n_hidden = 8

rnn = VanillaRNNCell(input_size=vocab_size, hidden_size=n_hidden, output_size=vocab_size)

# Initialize hidden state

hidden = rnn.init_hidden()

# Forward through the sequence

for i in range(input_one_hot.size(0)):

    output, hidden = rnn(input_one_hot[i], hidden)

    print(f'Step{i} output : {output}')


Step0 output : tensor([[-1.4189, -1.6050, -1.1916, -1.3728]], grad_fn=<LogSoftmaxBackward0>)
Step1 output : tensor([[-1.6854, -1.5395, -1.0498, -1.3857]], grad_fn=<LogSoftmaxBackward0>)
Step2 output : tensor([[-1.5752, -1.6865, -1.0236, -1.3920]], grad_fn=<LogSoftmaxBackward0>)
Step3 output : tensor([[-1.5379, -1.6941, -1.0208, -1.4226]], grad_fn=<LogSoftmaxBackward0>)


In [32]:
# Recreate the vocabulary mapping

char_to_idx = {'h': 0, 'e': 1, 'l': 2, 'o': 3}

idx_to_char = {v: k for k, v in char_to_idx.items()}

In [33]:
predicted_indices = []

for i in range(input_one_hot.size(0)):

    output, hidden = rnn(input_one_hot[i], hidden)

    predicted_idx = torch.argmax(output, dim=1)

    predicted_indices.append(predicted_idx.item())

print('Predicted Indices:', predicted_indices)

# Mapping back to characters

idx_to_char = {v: k for k, v in char_to_idx.items()}

predicted_chars = [idx_to_char[i] for i in predicted_indices]

print("Predicted characters:", predicted_chars)




Predicted Indices: [2, 2, 2, 2]
Predicted characters: ['l', 'l', 'l', 'l']


In [34]:
# Define target_indices manually (based on your earlier tensor)

target_indices = torch.tensor([1, 0, 0, 3])

In [35]:
# Convert target indices (tensors) to characters

target_chars = [idx_to_char[i.item()] for i in target_indices] # target_indices is still a tensor

# predicted_indices is a list of ints, so use them directly

predicted_chars = [idx_to_char[i] for i in predicted_indices]

# Print the comparison

for t, p in zip(target_chars, predicted_chars):

    print(f'Target:{t} → Predicted: {p}')

Target:e → Predicted: l
Target:h → Predicted: l
Target:h → Predicted: l
Target:o → Predicted: l


🧠 Interpretation

 1. Overfitting to a Single Class
The model is likely predicting the most frequent class or falling into a pattern because:
The input sequence is very short.
There's no training (you're just passing it through once, not optimizing loss).
You're using fixed weights initialized randomly — not trained.

 2. This is an Inference Only Example
You're doing manual inference without training — just forward passes with fixed weights, so:
The model has no learned understanding of sequence dependencies yet.
Predicting the same token ('C') is common in untrained or underfitted RNNs.