In [1]:

from torch import tensor, use_deterministic_algorithms

from karpathy_series.makemore.components.neuro.sequence import Sequence
from karpathy_series.makemore.util import set_rand_karpathy

%matplotlib inline
use_deterministic_algorithms(True)
set_rand_karpathy()

In [2]:
main_window = tensor(range(48)).view(2, 8, 3)
a = main_window.view(2, 4, 2, 3)

# The schema here is to expand at dim=1 after the main batch dimension
b = main_window.unflatten(1, (-1, 2))
assert (a == b).all().item()

In [3]:
from operator import mul
from functools import reduce
main_window = tensor(range(5*2*8*3*4)).view(5, 2, 8, 3, 4)
#a = main_window.view(2, 4, 2, 3)
#a.shape

dim = 2
factor = 2

s = main_window.shape
pre = s[:dim]
f = s[dim]
post = s[dim+1:]
print(f"{pre} - {f} - {post}")

fp = f // factor
l = reduce(mul, post, 1)*factor

print(fp, l)

v = pre + (fp, l)
print(v)



torch.Size([5, 2]) - 8 - torch.Size([3, 4])
4 24
torch.Size([5, 2, 4, 24])


In [4]:
from karpathy_series.makemore.components.neuro.embedding import Embedding
from karpathy_series.makemore.components.neuro.expand import Expand
from karpathy_series.makemore.components.neuro.flatten import Flatten
from karpathy_series.makemore.components.neuro.functional import Tanh
from karpathy_series.makemore.components.neuro.linear import Linear

code_size = 5
in_dims = 2
input_len = 15
grouping_size = 3
group_count = input_len // grouping_size
layer_2 = 4
layers = [
    # N[s, l] => R[s, l, d]
    Embedding(code_size, in_dims),

    # R[s, l, d] => R[s, g, c, d], l = gc
    Expand(1, grouping_size),

    # R[s, g, c, d] => R[s, g, cd]
    Flatten(2),

    # R[s, g, cd] => R[s, g, v]
    Linear(grouping_size * in_dims, layer_2),

    # Pointwise
    Tanh(),

    # R[s, g, v] => R[s, gv]
    Flatten(2),
    
    # R[s, gv] => R[s, o]
    Linear(group_count * layer_2, 4),
]

x_in = tensor([[0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]])

x = x_in
print(x.shape)
for l in layers:
    x = l(x)
    print(x.shape, l.describe())

torch.Size([2, 15])
torch.Size([2, 15, 2]) Embedding [5, 2]
torch.Size([2, 5, 3, 2]) Expand dim 1 into 3 sized batches
torch.Size([2, 5, 6]) Flatten last 2 dims
torch.Size([2, 5, 4]) Linear [6, 4] with bias
torch.Size([2, 5, 4]) Functional <built-in method tanh of type object at 0x7f1000f43ce0>
torch.Size([2, 20]) Flatten last 2 dims
torch.Size([2, 4]) Linear [20, 4] with bias


In [5]:
from karpathy_series.makemore.components.neuro.slide import Slide

layers = [
    # N[s, l] => R[s, l, d]
    Embedding(code_size, in_dims),

    # R[s, l, d] => R[s, g, cd], l = gc
    Slide(1, grouping_size),

    # R[s, g, cd] => R[s, g, v]
    Linear(grouping_size * in_dims, layer_2),

    # Pointwise
    Tanh(),

    # R[s, g, v] => R[s, gv]
    Flatten(2),
    
    # R[s, gv] => R[s, o]
    Linear(group_count * layer_2, 4),
]

x_in = tensor([[0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]])

x = x_in
print(x.shape)
for l in layers:
    x = l(x)
    print(x.shape, l.describe())

torch.Size([2, 15])
torch.Size([2, 15, 2]) Embedding [5, 2]
torch.Size([2, 5, 6]) Move a factor of 3 from 1 to 2
torch.Size([2, 5, 4]) Linear [6, 4] with bias
torch.Size([2, 5, 4]) Functional <built-in method tanh of type object at 0x7f1000f43ce0>
torch.Size([2, 20]) Flatten last 2 dims
torch.Size([2, 4]) Linear [20, 4] with bias


In [9]:
from karpathy_series.makemore.components.neuro.batch_norm import BatchNorm1d

context_size = 8
charset_size = 27
embedding_dims = 10
hidden_dims = 200
net = Sequence([
    # N[b, 8n] => R[b, 8n, d]
    Embedding(charset_size, embedding_dims),
    
    # R[b, 8n, d] => R[b, 4n, 2d]
    Slide(1, 2),

    # R[b, 4n, 2d] => R[b, 4n, h]
    Linear(2 * embedding_dims, hidden_dims),
    BatchNorm1d(hidden_dims, init_scale=0.01),
    Tanh(),
    
    # R[b, 4n, h] => R[b, 2n, 2h]
    Slide(1, 2),
    
    # R[b, 2n, 2h] => R[b, 2n, h]
    Linear(2 * hidden_dims, hidden_dims),
    BatchNorm1d(hidden_dims, init_scale=0.01),
    Tanh(),
    
    # R[b, 2n, h] => R[b, n, 2h]
    Slide(1, 2),
    
    # R[b, n, 2h] => R[b, n, N], n is 1 for 8, we will assume
    Linear(2 * hidden_dims, embedding_dims),
    BatchNorm1d(embedding_dims, init_scale=0.01),
])

print(net.describe())

Sequence [Embedding [27, 10], Move a factor of 2 from 1 to 2, Linear [20, 200] with bias, BatchNorm1d [200], Functional <built-in method tanh of type object at 0x7f1000f43ce0>, Move a factor of 2 from 1 to 2, Linear [400, 200] with bias, BatchNorm1d [200], Functional <built-in method tanh of type object at 0x7f1000f43ce0>, Move a factor of 2 from 1 to 2, Linear [400, 10] with bias, BatchNorm1d [10]]
