In [1]:
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader


In [2]:
# Utils
def make_random_bin(length):
		return "".join([str(random.randint(0, 1)) for i in range(length)])

def add_bin(a: str, b: str):
		ans = bin(int(a[::-1], 2) + int(b[::-1], 2))[2:]
		# Pad with zeros
		ans = "0" * (len(a) - len(ans)) + ans
		return ans[::-1]


Create the dataset

In [3]:
class BinaryDataset(Dataset):
	def __init__(self, seq_length, num_bits):
		self.seq_length = seq_length
		self.num_bits = num_bits
		self.seq1 = make_random_bin(seq_length)
		self.seq2 = make_random_bin(seq_length)

	def __getitem__(self, index):
		sample1 = self.seq1[index:index + self.num_bits - 1] + "0"
		sample2 = self.seq2[index:index + self.num_bits - 1] + "0"
		input = torch.tensor([[int(x, 2) for x in sample1], [int(x, 2) for x in sample2]])
		input = torch.transpose(input, 0, 1).float()
		output = torch.tensor([int(x, 2) for x in add_bin(sample1, sample2)])
		return input, output

	def __len__(self):
		return self.seq_length - self.num_bits + 1

In [4]:
data = BinaryDataset(128, 5)
test_dataloader = DataLoader(dataset=data, batch_size=128)
print(data.__getitem__(0))

(tensor([[1., 1.],
        [1., 1.],
        [0., 0.],
        [1., 0.],
        [0., 0.]]), tensor([0, 1, 1, 1, 0]))


In [21]:
# Define the model
class Model(nn.Module):
	def __init__(self) -> None:
		super().__init__()
		self.n_layers = 1
		self.hidden_dim = 10

		self.rnn = nn.RNN(input_size=2, hidden_size=self.hidden_dim, num_layers=self.n_layers, nonlinearity='relu')
		self.fc = nn.Linear(self.hidden_dim, 1)

	def forward(self, x):
		batch_size = x.size(1)
		hidden = self.init_hidden(batch_size)

		# X is of size (batch_size, sequence_length, input_size)
		# hidden is of size (n_layers, sequence_length, hidden_dim)
		
		out, hidden = self.rnn(x, hidden)

		out = self.fc(out)

		out = nn.Sigmoid()(out)

		return out, hidden

	def init_hidden(self, batch_size):
		# This method generates the first hidden state of zeros which we'll use in the forward pass
		# We'll send the tensor holding the hidden state to the device we specified earlier as well
		hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim)
		return hidden

In [6]:
# Create the model
model = Model()

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Test the model's accuracy
def compute_accuracy(label, pred):
	pred = torch.round(pred).squeeze()
	# Pred is of size (batch_size, sequence_length)
	# Label is of size (batch_size, sequence_length)
	return torch.sum(label == pred).item() / (len(label) * len(label[0]))


# Try the model on random data
for x, label in iter(test_dataloader):
	out, hidden = model(x)
	print(compute_accuracy(label, out))



0.49193548387096775


In [9]:

# Train the model
num_epochs = 100

all_losses = []

best_loss = 10

for epoch in range(num_epochs):
	for x, label in iter(test_dataloader):
		model.train()

		# Forward pass
		out, hidden = model(x)
		loss = criterion(out.squeeze(), label.float())

		# Backward and optimize
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()

		# Compute accuracy
		accuracy = compute_accuracy(label, out)

		if loss.item() < best_loss:
			best_loss = loss.item()
			torch.save(model.state_dict(), "best-model.bin")

		print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {accuracy * 100:.2f}%")


Epoch [0/100], Loss: 0.6979, Accuracy: 47.26%
Epoch [1/100], Loss: 0.6955, Accuracy: 47.26%
Epoch [2/100], Loss: 0.6937, Accuracy: 49.68%
Epoch [3/100], Loss: 0.6924, Accuracy: 51.13%
Epoch [4/100], Loss: 0.6915, Accuracy: 59.19%
Epoch [5/100], Loss: 0.6906, Accuracy: 52.74%
Epoch [6/100], Loss: 0.6900, Accuracy: 52.74%
Epoch [7/100], Loss: 0.6895, Accuracy: 52.74%
Epoch [8/100], Loss: 0.6891, Accuracy: 52.74%
Epoch [9/100], Loss: 0.6888, Accuracy: 52.74%
Epoch [10/100], Loss: 0.6886, Accuracy: 52.74%
Epoch [11/100], Loss: 0.6884, Accuracy: 52.74%
Epoch [12/100], Loss: 0.6882, Accuracy: 52.74%
Epoch [13/100], Loss: 0.6880, Accuracy: 52.74%
Epoch [14/100], Loss: 0.6878, Accuracy: 52.74%
Epoch [15/100], Loss: 0.6875, Accuracy: 52.74%
Epoch [16/100], Loss: 0.6872, Accuracy: 52.74%
Epoch [17/100], Loss: 0.6869, Accuracy: 52.74%
Epoch [18/100], Loss: 0.6864, Accuracy: 52.74%
Epoch [19/100], Loss: 0.6860, Accuracy: 52.74%
Epoch [20/100], Loss: 0.6855, Accuracy: 52.74%
Epoch [21/100], Loss: 0

In [28]:
# Use the best model to add two user predicted numbers

model = Model()

model.load_state_dict(torch.load("best-model.bin"))

# Get two numbers from the user
n1 = int(input("Enter the first number: "))
n2 = int(input("Enter the second number: "))

# Convert the numbers to binary
num1 = bin(n1)[2:]
num2 = bin(n2)[2:]

# Pad the numbers with zeros so that they are of the same length
if len(num1) > len(num2):
    num2 = "0" * (len(num1) - len(num2)) + num2
elif len(num2) > len(num1):
    num1 = "0" * (len(num2) - len(num1)) + num1

# Reverse the numbers
num1 = num1[::-1] + "0"
num2 = num2[::-1] + "0"

numsT = torch.tensor([[int(x, 2) for x in num1], [int(x, 2) for x in num1]])

numsT = numsT.unsqueeze(0).float().transpose(1, 2)

out, hidden = model(numsT)

# round the output
out = torch.round(out).squeeze()

# Convert the output to a string
outbin = [str(int(x)) for x in out]

# Reverse the output
out = outbin[::-1]

# Convert the output to a number
out = int("".join(out), 2)

sum = n1 + n2

# Convert sum to binary
sum = bin(sum)[2:]

# reverse the sum
sum = sum[::-1]

# Compute percent diff between the sum and the model's output
# By counting the number of digits correct
correct = 0
for i in range(len(sum)):
    if sum[i] == outbin[i]:
        correct += 1
percent_correct = correct / len(sum)

print(n1, "+", n2, "=", out, "(", percent_correct * 100, "% correct)")

2 + 3 = 5 ( 100.0 % correct)
