In [10]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data

from torchvision import datasets, transforms

from nets.NetOneLayer import NetOneLayer
from nets.NetOneLayerLowRank import NetOneLayerLowRank

In [3]:
batch_size = 128
batch_size_test = 1000

In [4]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])),
    batch_size=batch_size_test, shuffle=True)

In [5]:
def train(model, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 200 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
            
def test(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [None]:
lr = 0.02
momentum = 0.9
n_epochs = 50

model = NetOneLayerLowRank(n_hidden=2**8, d=2, K=2)
# model = NetOneLayer(n_hidden=2**8)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

for epoch in range(n_epochs):
    train(model, train_loader, optimizer, epoch)
    test(model, test_loader)


Test set: Average loss: -0.1719, Accuracy: 1751/10000 (18%)


Test set: Average loss: -0.2552, Accuracy: 2604/10000 (26%)


Test set: Average loss: -0.3296, Accuracy: 3418/10000 (34%)


Test set: Average loss: -0.3608, Accuracy: 3639/10000 (36%)


Test set: Average loss: -0.3744, Accuracy: 3763/10000 (38%)


Test set: Average loss: -0.3864, Accuracy: 3900/10000 (39%)


Test set: Average loss: -0.3943, Accuracy: 3956/10000 (40%)


Test set: Average loss: -0.4246, Accuracy: 4287/10000 (43%)


Test set: Average loss: -0.4522, Accuracy: 4565/10000 (46%)


Test set: Average loss: -0.4626, Accuracy: 4669/10000 (47%)


Test set: Average loss: -0.4724, Accuracy: 4764/10000 (48%)


Test set: Average loss: -0.4810, Accuracy: 4842/10000 (48%)


Test set: Average loss: -0.4845, Accuracy: 4890/10000 (49%)


Test set: Average loss: -0.4890, Accuracy: 4920/10000 (49%)


Test set: Average loss: -0.4929, Accuracy: 4964/10000 (50%)


Test set: Average loss: -0.4944, Accuracy: 4983/10000 (50%)


Test se

In [20]:
a = torch.rand(2,1, 4)
a
F.max_pool1d(a, 4).shape

tensor([[[0.8301, 0.2546, 0.5547, 0.2650]],

        [[0.4740, 0.0082, 0.6353, 0.9605]]])

torch.Size([2, 1, 1])

In [32]:
?nn.MaxPool2d

[1;31mInit signature:[0m [0mnn[0m[1;33m.[0m[0mMaxPool2d[0m[1;33m([0m[0mkernel_size[0m[1;33m,[0m [0mstride[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [0mpadding[0m[1;33m=[0m[1;36m0[0m[1;33m,[0m [0mdilation[0m[1;33m=[0m[1;36m1[0m[1;33m,[0m [0mreturn_indices[0m[1;33m=[0m[1;32mFalse[0m[1;33m,[0m [0mceil_mode[0m[1;33m=[0m[1;32mFalse[0m[1;33m)[0m[1;33m[0m[0m
[1;31mDocstring:[0m     
Applies a 2D max pooling over an input signal composed of several input
planes.

In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
can be precisely described as:

.. math::

    \begin{equation*}
    \text{out}(N_i, C_j, h, w)  = \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1}
                           \text{input}(N_i, C_j, \text{stride}[0] * h + m, \text{stride}[1] * w + n)
    \end{equation*}

If :attr:`padding` is non-zero, then the inp