In [1]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

In [3]:
# read in all the words
words = open('names.txt', 'r').read().splitlines()
words[:8]

['emma', 'olivia', 'ava', 'isabella', 'sophia', 'charlotte', 'mia', 'amelia']

In [4]:
len(words)

32033

In [5]:
# build the vocabulary of characters and mappings to/from integers
chars = sorted(list(set(''.join(words))))
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0
itos = {i:s for s,i in stoi.items()}
print(itos)

{1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z', 0: '.'}


In [None]:
# build the dataset

block_size = 3 # context length : how many characters do we take to predict the next one?

X,Y = [], []
for w in words[:5]:
    print(w)
    context = [0] * block_size
    for ch in w + '.':
        ix = stoi[ch]
        X.append(context)  
        Y.append(ix)
        print(''.join(itos[i] for i in context), '---->', itos[ix])
        context = context[1:] + [ix] # crop and append

X = torch.tensor(X)
Y = torch.tensor(Y)

emma
... ----> e
..e ----> m
.em ----> m
emm ----> a
mma ----> .
olivia
... ----> o
..o ----> l
.ol ----> i
oli ----> v
liv ----> i
ivi ----> a
via ----> .
ava
... ----> a
..a ----> v
.av ----> a
ava ----> .
isabella
... ----> i
..i ----> s
.is ----> a
isa ----> b
sab ----> e
abe ----> l
bel ----> l
ell ----> a
lla ----> .
sophia
... ----> s
..s ----> o
.so ----> p
sop ----> h
oph ----> i
phi ----> a
hia ----> .


In [7]:
X.shape, X.dtype, Y.shape, Y.dtype

(torch.Size([32, 3]), torch.int64, torch.Size([32]), torch.int64)

In [9]:
C = torch.randn((27,2))
C

tensor([[ 2.5192, -0.7479],
        [ 0.5904, -0.7855],
        [-0.2911, -0.1022],
        [ 1.0891, -0.0584],
        [-1.5367,  2.6640],
        [-0.3922,  0.6396],
        [ 1.5428, -0.1711],
        [-0.4639, -0.8532],
        [-0.0945,  1.3032],
        [ 0.3414,  0.4925],
        [-1.0260, -1.4117],
        [-0.2255,  1.4623],
        [-0.4746,  0.1437],
        [-1.0757,  0.2658],
        [-1.4127,  2.2985],
        [ 0.4602,  0.9911],
        [ 0.1988, -2.0633],
        [-1.2186,  0.2204],
        [ 0.1185, -0.3875],
        [-1.9526,  0.4400],
        [-0.6413,  1.7736],
        [-1.0451, -2.1577],
        [ 2.7284,  1.2501],
        [ 0.2755,  1.6195],
        [-1.3268,  1.3963],
        [-0.2454, -0.9928],
        [ 0.9465, -0.3431]])

In [10]:
C[5]

tensor([-0.3922,  0.6396])

In [13]:
F.one_hot(torch.tensor(5),num_classes=27).dtype

torch.int64

In [None]:
F.one_hot(torch.tensor(5),num_classes=27).float() @ C # the reason why they're called 'lookup table'

tensor([-0.3922,  0.6396])

In [20]:
C[X].shape

torch.Size([32, 3, 2])