In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

from autograd.tensor import Tensor
from autograd.tensor import Function
from autograd.datautil import fetch_mnist
from tqdm import tqdm
plt.style.use('seaborn-deep')
__plot__ = False

In [2]:
x = Tensor([.2, 1., 5.3], device='cpu', requires_grad=True)
w = Tensor.ones(3, device='cuda', requires_grad=True)

z = x.dot(w)
z.backward()

print(x.grad)
print(w.grad)
print(z)

[1. 1. 1.]
[0.2 1.  5.3]
<autograd.tensor.Tensor:
6.5 device=CPU _ctx=Dot grad=1.0>


In [10]:
class Net:
    def __init__(self):
        self.l1 = Tensor.uniform(784, 128, requires_grad=True)
        self.l2 = Tensor.uniform(128, 10, requires_grad=True)
    
    def __call__(self, x):
        return self.forward(x)

    def forward(self, x):
        x = x.dot(self.l1)
        x = x.relu()
        x = x.dot(self.l2)
        x = x.logsoftmax()
        return x
    


In [4]:
X_train, Y_train, X_test, Y_test = fetch_mnist()

http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz already exists, loading it...
http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz already exists, loading it...
http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz already exists, loading it...
http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz already exists, loading it...


In [5]:
if __plot__:
    G = 10
    fig, axs = plt.subplots(G, G, figsize=(20, 20))
    for x in range(G):
        for y in range(G):
            item = x*G + y
            axs[x, y].imshow(X_train[item, :])
            axs[x, y].axis('off')

In [28]:
# SGD training
batchsize = 128
eta = 0.001
epochs = 10
model = Net()

for epoch in tqdm(range(epochs)):
    samples = np.random.randint(0, X_train.shape[0], size=(batchsize))
    points = Tensor(X_train[samples].reshape((-1, 28*28)), requires_grad=True)
    Y = Y_train[samples]
    labels = np.zeros((batchsize, 10), np.float32)
    labels[range(batchsize), Y] = -1.0
    labels = Tensor(labels)
    
    output = model(points)
    output = output.mul(labels)
    output = output.mean()
    output.backward()
    
    loss = output.data
    print(loss)
    
    #print(model.l1.grad)
    #print(model.l2.grad)
    
    model.l2.data = model.l2.data - eta*model.l2.grad
    model.l1.data = model.l1.data - eta*model.l1.grad
    
    model.l1.grad = None
    model.l2.grad = None

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 78.26it/s]

[0.25800889]
[31479.45]
[0.24475594]
[0.23025851]
[0.23024199]
[0.23025851]
[0.23025851]
[0.23025851]
[0.23025851]
[0.23025851]



