In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('../')

import utils

import torch
from torch import Tensor as T
import torch.nn as nn
from torch import autograd
import torch.optim as optim
import torch.nn.functional as F
from tqdm import tqdm_notebook


import math
import numpy as np

In [3]:
torch.cuda.set_device(0)

### MNIST

In [2]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

batch_size = 128
n_epochs = 1

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, 3)
        self.conv2 = nn.Conv2d(64, 128, 3)
        self.pool1 = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(128 * 12 * 12, 512)
        self.fc2 = nn.Linear(512, 10)

        # print('Leaky ReLU is used!')
        
    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool1(x)

        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = F.relu(x)

        x = self.fc2(x)
        x = F.softmax(x)
        return x

In [5]:
train_loader = DataLoader(
    datasets.MNIST('../../RandomNet/data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=True)
test_loader = DataLoader(
    datasets.MNIST('../../RandomNet/data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=batch_size, shuffle=False)

In [7]:
class SVMNet(Net):
    def __init__(self):
        super(SVMNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, 3, bias=False)
        self.conv2 = nn.Conv2d(64, 128, 3, bias=False)
        
        self.pool1 = nn.MaxPool2d(2)
        
        self.fc1 = nn.Linear(128 * 12 * 12, 512)        
        self.fc2 = nn.Linear(512, 10)

model = SVMNet()
model.cuda()

SVMNet (
  (conv1): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear (18432 -> 512)
  (fc2): Linear (512 -> 10)
)

In [8]:
def train(train_loader, test_loader, model, optimizer, scheduler=None, start_epoch=0, stop_epoch=20,
          fix_random_weights=False):

    n_epochs = stop_epoch - start_epoch
    n_steps = 0
    for epoch in tqdm_notebook(range(start_epoch, stop_epoch), desc='epochs', total=n_epochs):

        # train
        if fix_random_weights:
            model.eval()
        else:
            model.train()
        train_loss = 0
        n_train_samples = 0
        n_train_batches = 0
        if scheduler is not None:
            scheduler.step()
        for data, target in tqdm_notebook(train_loader, leave=False):
            data, target = data.cuda(), target.cuda()
            data, target = autograd.Variable(data), autograd.Variable(target)

            optimizer.zero_grad()
            output = model(data)
            loss = utils.svm_l1loss(output, target, model.fc2.weight, C=1, batch_fraction=1)
            loss.backward()
            optimizer.step()

            train_loss += loss.data[0]
            n_train_samples += target.size(0)
            n_train_batches += 1
            n_steps += 1

        train_loss /= n_train_batches


        # evaluate
        test_accuracy = evaluate(model, test_loader)

        # print progress
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tTest acc: {:.3f}'.format(
              epoch, n_train_samples, len(train_loader.dataset),
              100. * n_train_batches / len(train_loader), train_loss,
              test_accuracy))

In [11]:
optimizer = optim.Adadelta(model.parameters(), rho=0.95, lr=0.001)
train(train_loader, test_loader, model, optimizer, start_epoch=0, stop_epoch=20)

torch.Size([128, 10])
torch.Size([128])
torch.Size([10, 512])



TypeError: mul received an invalid combination of arguments - got (torch.cuda.LongTensor), but expected one of:
 * (float value)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.cuda.LongTensor[0m)
 * (torch.cuda.FloatTensor other)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.cuda.LongTensor[0m)


In [90]:
for name, p in model.named_parameters():
    if 'mu' in name or 'std' in name:
        if 'std' in name:
            x = np.exp(p.data.cpu().numpy())
        else:
            x = p.data.cpu().numpy()
        print(name, x[0])

conv1.W_mu 0.0764163
conv1.W_logstd 0.0099965
conv2.W_mu 0.051796
conv2.W_logstd 0.0100005


In [66]:
train(train_loader, test_loader, model, optimizer, n_epochs=20, random_weights=True)









KeyboardInterrupt: 

          178/|/ 38%|| 178/469 [00:16<00:06, 45.63it/s]