In [2]:
import torch
from torch import nn
from torch.nn import functional as F
from tqdm import tqdm

torch.manual_seed(1234)
device = 'cuda' if torch.cuda.is_available() else 'cpu' 

#### In this project, we will attempt to build a minimal GPT model which learnes to add two integers, i.e. given the input string "a+b=", the model will be trained to predict the integer sequence "c", where c=a+b.

#### This is a simple next character prediction task. We will attempt two different versions of this task: 1) The integers of "c" are predicted left-to-right 2) the integers are predicted from right to left (i.e backward) which is typically how humans compute additions. 


In [3]:
# first let's set up the token vocabulary for this problem
# note that we have two special tokens '<*>' which denotes the beginning or end of a 
# sequence and the '<PAD>' token which is used for pre-padding sequences to ensure fixed length 
vocab = sorted(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '+', '=', '<*>', '<PAD>'])
vocab_size = len(vocab)
print(f"Vocabulary: {vocab}")

# tokenization
ctoi = {vocab[i]:i for i in range(vocab_size)}
itoc = {i:vocab[i] for i in range(vocab_size)}
encode = lambda s: [ctoi[c] for c in s]  # converts a string to integer token sequence
decode = lambda s: [itoc[ix] for ix in s]  # converts an integer token sequence to string of characters


Vocabulary: ['+', '0', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9', '<*>', '<PAD>', '=']


#### First, lets implement the data loader which generates input-target pairs.