<a href="https://colab.research.google.com/github/tsilva/aiml-notebooks/blob/main/wip-rnn-bit-parity-classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RNN - Bit-parity classifier

In this notebook we'll try to build an RNN to memorize how to classify bit sequences and classify their parity. It should output `1` if it has an odd number of `1`s and `0` if they are even.

In [114]:
def setup_config():
    #@markdown Random seed for reproducibility
    seed = 42  # @param {type:"integer"}

    #@markdown Number of training epochs
    n_epochs = 100 # @param {type:"integer"}

    #@markdown Batch size
    batch_size = 10  # @param {type:"integer"}

    #@markdown Learning rate for the optimizer
    learning_rate = 0.01  # @param {type:"number"}

    #@markdown Length of the input sequence (number of time steps)
    sequence_length = 1000  # @param {type:"integer"}

    #@markdown Number of hidden units in the RNN
    hidden_size = 16  # @param {type:"integer"}

    #@markdown Activation function to use in the RNN ('tanh' or 'relu')
    nonlinearity = 'tanh'  # @param ['tanh', 'sigmoid', 'relu']

    #@markdown Weight initialization strategy ('none', 'xavier', or 'kaiming')
    weight_init = 'none'  # @param ['none', 'xavier', 'kaiming']

    #@markdown Maximum gradient norm for clipping (0.0 means no clipping)
    max_grad_norm = 0.0  # @param {type:"number"}

    # These are meant to be hardcoded in this notebook
    batch_size = 1
    input_size = 1
    output_size = 2

    return {
        'seed': seed,
        'n_epochs': n_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'sequence_length': sequence_length,
        'input_size': input_size,
        'hidden_size': hidden_size,
        'output_size': output_size,
        'nonlinearity': nonlinearity,
        'weight_init': weight_init,
        'max_grad_norm': max_grad_norm
    }

CONFIG = setup_config()

Set manual seed for reproducibility:

In [115]:
import torch
import torch.nn as nn
import random

def set_seed(seed):
    random.seed(seed)
    torch.manual_seed(seed)

set_seed(CONFIG['seed'])

Generate dataset:

In [116]:
def generate_data(n_samples, sequence_length):
    data = []
    labels = []
    for _ in range(n_samples):
        seq = [random.randint(0, 1) for _ in range(sequence_length)]
        parity = sum(seq) % 2
        data.append(seq)
        labels.append(parity)
    return torch.tensor(data, dtype=torch.float32).unsqueeze(-1), torch.tensor(labels)

sequence_length = CONFIG["sequence_length"]
data, labels = generate_data(100, sequence_length)

Build the model:

In [117]:
def build_model():
    input_size = CONFIG["input_size"]
    hidden_size = CONFIG["hidden_size"]
    output_size = CONFIG["output_size"]

    # Parameters
    i2h_weights = nn.Parameter(torch.randn(input_size + hidden_size, hidden_size) * 0.1)
    i2h_bias = nn.Parameter(torch.zeros(hidden_size))
    h2o_weights = nn.Parameter(torch.randn(hidden_size, output_size) * 0.1)
    h2o_bias = nn.Parameter(torch.zeros(output_size))

    # Init
    nonlinearity = CONFIG['nonlinearity']
    if CONFIG['weight_init'] == 'xavier':
        nn.init.xavier_uniform_(i2h_weights, gain=nn.init.calculate_gain(nonlinearity))
        nn.init.xavier_uniform_(h2o_weights, gain=nn.init.calculate_gain(nonlinearity))
    elif CONFIG['weight_init'] == 'kaiming':
        nn.init.kaiming_uniform_(i2h_weights, mode='fan_in', nonlinearity=nonlinearity)
        nn.init.kaiming_uniform_(h2o_weights, mode='fan_in', nonlinearity=nonlinearity)

    return [i2h_weights, i2h_bias, h2o_weights, h2o_bias]

model_params = build_model()
i2h_weights, i2h_bias, h2o_weights, h2o_bias = model_params

Test forward pass:

In [118]:
n_epochs = CONFIG['n_epochs']
hidden_size = CONFIG['hidden_size']
learning_rate = CONFIG['learning_rate']

# Activation function
def activation(x):
    if CONFIG['nonlinearity'] == 'relu': return torch.relu(x)
    elif CONFIG['nonlinearity'] == 'sigmoid': return torch.sigmoid(x)
    elif CONFIG['nonlinearity'] == 'tanh': return torch.tanh(x)
    else: raise ValueError(f"Unknown activation function: {CONFIG['nonlinearity']}")

# Forward pass
def forward(input_seq):
    hidden_size = CONFIG['hidden_size']
    h = torch.zeros(input_seq.size(0), hidden_size)
    for t in range(input_seq.size(1)):
        combined = torch.cat((input_seq[:, t], h), dim=1)
        h = activation(combined @ i2h_weights + i2h_bias)
    output = h @ h2o_weights + h2o_bias
    return output

Train:

In [112]:
def train():
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model_params, lr=CONFIG['learning_rate'])
    batch_size = CONFIG['batch_size']
    sequence_length = CONFIG['sequence_length']

    for epoch in range(CONFIG['n_epochs']):
        data, labels = generate_data(batch_size, sequence_length)
        outputs = forward(data)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()

        if CONFIG['max_grad_norm']:
            nn.utils.clip_grad_norm_(model_params, CONFIG['max_grad_norm'])

        optimizer.step()

        if (epoch + 1) % 100 == 0 or epoch == 0:
            predicted = torch.argmax(outputs, dim=1)
            accuracy = (predicted == labels).float().mean()
            print(f'Epoch [{epoch + 1}/{CONFIG["n_epochs"]}], Loss: {loss.item():.4f}, Accuracy: {accuracy.item():.4f}')

train()

Epoch [1/1000], Loss: 0.6951, Accuracy: 0.0000
Epoch [100/1000], Loss: 0.6139, Accuracy: 1.0000
Epoch [200/1000], Loss: 0.5563, Accuracy: 1.0000
Epoch [300/1000], Loss: 0.3651, Accuracy: 1.0000
Epoch [400/1000], Loss: 1.1419, Accuracy: 0.0000
Epoch [500/1000], Loss: 0.4944, Accuracy: 1.0000
Epoch [600/1000], Loss: 0.2360, Accuracy: 1.0000
Epoch [700/1000], Loss: 0.5357, Accuracy: 1.0000
Epoch [800/1000], Loss: 1.3042, Accuracy: 0.0000
Epoch [900/1000], Loss: 0.8506, Accuracy: 0.0000
Epoch [1000/1000], Loss: 0.9306, Accuracy: 0.0000


Evaluate:

In [113]:
def eval():
    test_data, test_labels = generate_data(10, CONFIG['sequence_length'])
    with torch.no_grad():
        outputs = forward(test_data)
        predicted = torch.argmax(outputs, dim=1)
        accuracy = (predicted == test_labels).float().mean()

        print("\nTest Results:")
        for seq, pred, label in zip(test_data.squeeze(-1), predicted, test_labels):
            print(f"Sequence: {seq.tolist()}, Predicted: {pred.item()}, Actual: {label.item()}")

        print(f"\nFinal Test Accuracy: {accuracy.item():.4f}")

eval()


Test Results:
Sequence: [0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0,