In [1]:
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader


In [2]:
# Utils
def make_random_bin(length):
		return "".join([str(random.randint(0, 1)) for i in range(length)])

def add_bin(a: str, b: str):
		ans = bin(int(a[::-1], 2) + int(b[::-1], 2))[2:]
		# Pad with zeros
		ans = "0" * (len(a) - len(ans)) + ans
		return ans[::-1]


Create the dataset

In [3]:

class BinaryDataset(Dataset):
	def __init__(self, seq_length, num_bits):
		self.seq_length = seq_length
		self.num_bits = num_bits
		self.seq1 = make_random_bin(seq_length)
		self.seq2 = make_random_bin(seq_length)

	def __getitem__(self, index):
		sample1 = self.seq1[index:index + self.num_bits - 1] + "0"
		sample2 = self.seq2[index:index + self.num_bits - 1] + "0"
		input = torch.tensor([[int(x, 2) for x in sample1], [int(x, 2) for x in sample2]])
		input = torch.transpose(input, 0, 1).float()
		output = torch.tensor([int(x, 2) for x in add_bin(sample1, sample2)])
		return input, output

	def __len__(self):
		return self.seq_length - self.num_bits + 1

In [56]:

data = BinaryDataset(128, 5)
test_dataloader = DataLoader(dataset=data, batch_size=128)

print(data.__getitem__(0))


(tensor([[0., 1.],
        [1., 1.],
        [1., 1.],
        [0., 0.],
        [0., 0.]]), tensor([1, 0, 1, 1, 0]))


In [57]:
# Define the model
class Model(nn.Module):
	def __init__(self) -> None:
		super().__init__()
		self.n_layers = 1
		self.hidden_dim = 10

		self.rnn = nn.RNN(input_size=2, hidden_size=self.hidden_dim, num_layers=self.n_layers, nonlinearity='relu')
		self.fc = nn.Linear(self.hidden_dim, 1)

	def forward(self, x):
		batch_size = x.size(1)
		hidden = self.init_hidden(batch_size)

		# X is of size (batch_size, sequence_length, input_size)
		# hidden is of size (n_layers, sequence_length, hidden_dim)
		
		out, hidden = self.rnn(x, hidden)

		out = self.fc(out)

		out = nn.Sigmoid()(out)

		return out, hidden

	def init_hidden(self, batch_size):
		# This method generates the first hidden state of zeros which we'll use in the forward pass
		# We'll send the tensor holding the hidden state to the device we specified earlier as well
		hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
		return hidden

In [58]:
# Create the model
model = Model()

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Test the model's accuracy
def compute_accuracy(label, pred):
	pred = torch.round(pred).squeeze()
	# Pred is of size (batch_size, sequence_length)
	# Label is of size (batch_size, sequence_length)
	return torch.sum(label == pred).item() / (len(label) * len(label[0]))


# Try the model on random data
for x, label in iter(test_dataloader):
	out, hidden = model(x)
	print(compute_accuracy(label, out))



0.4612903225806452


In [60]:

# Train the model
num_epochs = 500



for epoch in range(num_epochs):
	for x, label in iter(test_dataloader):
		# Forward pass
		out, hidden = model(x)
		loss = criterion(out.squeeze(), label.float())

		# Backward and optimize
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

		# Compute accuracy
		accuracy = compute_accuracy(label, out)

		if epoch % 50 == 0:
			print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy * 100:.2f}%")


Epoch [0/500], Loss: 0.6784, Accuracy: 56.13%
Epoch [50/500], Loss: 0.6712, Accuracy: 52.42%
Epoch [100/500], Loss: 0.6628, Accuracy: 58.23%
Epoch [150/500], Loss: 0.6525, Accuracy: 60.97%
Epoch [200/500], Loss: 0.6393, Accuracy: 68.06%
Epoch [250/500], Loss: 0.6241, Accuracy: 72.74%
Epoch [300/500], Loss: 0.6074, Accuracy: 74.68%
Epoch [350/500], Loss: 0.5874, Accuracy: 75.32%
Epoch [400/500], Loss: 0.5663, Accuracy: 76.61%
Epoch [450/500], Loss: 0.5473, Accuracy: 76.29%
