In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


import utils
from nets.NetOneLayer import NetOneLayer
from nets.NetOneLayerLowRank import NetOneLayerLowRank

In [3]:
batch_size = 128
batch_size_test = 1000

train_loader, test_loader = utils.load_mnist(batch_size, batch_size_test)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [4]:
def train(model, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def test(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
lr = 0.02
momentum = 0.9
n_epochs = 100

model = NetOneLayerLowRank(n_hidden=2**8, d=2, K=2)
# model = NetOneLayer(n_hidden=2**8)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

for epoch in range(n_epochs):
    train(model, train_loader, optimizer, epoch)
    test(model, test_loader)


Test set: Average loss: -0.2814, Accuracy: 2825/10000 (28%)


Test set: Average loss: -0.3347, Accuracy: 3379/10000 (34%)


Test set: Average loss: -0.4383, Accuracy: 4453/10000 (45%)


Test set: Average loss: -0.4901, Accuracy: 4947/10000 (49%)


Test set: Average loss: -0.5224, Accuracy: 5295/10000 (53%)


Test set: Average loss: -0.5427, Accuracy: 5490/10000 (55%)


Test set: Average loss: -0.5596, Accuracy: 5672/10000 (57%)


Test set: Average loss: -0.6054, Accuracy: 6179/10000 (62%)


Test set: Average loss: -0.6282, Accuracy: 6381/10000 (64%)


Test set: Average loss: -0.6397, Accuracy: 6489/10000 (65%)


Test set: Average loss: -0.6485, Accuracy: 6586/10000 (66%)

