In [1]:
%load_ext autoreload
%autoreload 2

import bayes_pytorch_local_reparam as bayes

import torch
from torch import Tensor as T
import torch.nn as nn
from torch import autograd
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.nn.functional as F
from tqdm import tqdm_notebook


import math
import numpy as np

import os

In [2]:
torch.cuda.set_device(1)

### CIFAR-10

In [3]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

batch_size = 128
n_epochs = 1


print('==> Preparing data..')
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

train_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)

test_set = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
test_loader = DataLoader(test_set, batch_size=100, shuffle=False, num_workers=2)

==> Preparing data..
Files already downloaded and verified
Files already downloaded and verified


In [4]:
len(train_loader.dataset)

50000

In [5]:
def evaluate(model, test_loader):
    test_accuracy = 0
    n_test_samples = 0
    model.eval()
    for test_batch_idx, (test_data, test_target) in enumerate(test_loader):
        test_data, test_target = test_data.cuda(), test_target.cuda()
        test_data = autograd.Variable(test_data, volatile=True)
        output = model(test_data).data
        _, argmax = output.max(1)
        test_accuracy += test_target.eq(argmax).sum()
        n_test_samples += test_target.size(0)

    test_accuracy /= n_test_samples
    return test_accuracy
    
    
def save(model, path):
    print('Saving..')
    if not os.path.isdir('checkpoint'):
        os.mkdir('checkpoint')
 
    state = model.state_dict()
    torch.save(state, 'checkpoint/{}.t7'.format(path))
    
    
# load model
def load_model(basic_model, path):
    checkpoint = torch.load('./checkpoint/{}.t7'.format(path))
    basic_model.load_state_dict(checkpoint)
    basic_model.cuda()
    
    
def train(train_loader, test_loader, model, optimizer, scheduler=None, start_epoch=0, stop_epoch=20):

    from logger import Logger
    logger = Logger('./logs')

    n_epochs = stop_epoch - start_epoch
    n_steps = 0
    for epoch in tqdm_notebook(range(start_epoch, stop_epoch), desc='epochs', total=n_epochs):
        
        # train
        model.train()
        train_loss = 0
        n_train_samples = 0
        n_train_batches = 0
        if scheduler is not None:
            scheduler.step()
        for data, target in tqdm_notebook(train_loader, leave=False):
            data, target = data.cuda(), target.cuda()
            data, target = autograd.Variable(data), autograd.Variable(target)           

            optimizer.zero_grad()
            
#             output = 0
#             for j in range(5):
#                 output += model(data)
#             output /= 5.0
            output = model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            optimizer.step()
        
        
            # (2) Log values and gradients of the parameters (histogram)
            if n_train_batches % 100 == 1:
                for tag, value in model.named_parameters():
                    if 'logsigma' in tag or '_mu' in tag:
                        
                        if 'logsigma' in tag:
                            v = torch.exp(value)
                            tag = tag.replace('logsigma', 'sigma')
                        else:
                            v = value
                        
                        tag = tag.replace('.', '/')
                        logger.histo_summary(tag, v.data.cpu().numpy(), epoch)
                        if value.requires_grad:
                            logger.histo_summary(tag+'/grad', value.grad.data.cpu().numpy(), epoch)
        
            train_loss += loss.data[0]
            n_train_samples += target.size(0)
            n_train_batches += 1
            n_steps += 1
            
            logger.scalar_summary('loss', loss.data[0], n_steps)
            
        train_loss /= n_train_batches
        
        
        
        # evaluate
        test_accuracy = evaluate(model, test_loader)
        
        # print progress
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tTest acc: {:.3f}'.format(
              epoch, n_train_samples, len(train_loader.dataset),
              100. * n_train_batches / len(train_loader), train_loss,
              test_accuracy))
        
        #============ TensorBoard logging ============#
        # (1) Log the scalar values
        info = {
            'accuracy': test_accuracy
        }

        for tag, value in info.items():
            logger.scalar_summary(tag, value, epoch)
        
        
def lr_lambda(epoch):
    gamma = 1
    if epoch > 0 and epoch < 150:
        gamma = 1
    elif epoch >= 150 and epoch < 250:
        gamma = 0.1
    elif epoch >= 250 and epoch < 350:
        gamma = 0.01
    return gamma

In [None]:
resnet101 = bayes.ResNet(bayes.BasicBlock, [3, 4, 23, 3], random_weights=False)
resnet101.cuda()

parameters = [p for p in resnet101.parameters() if p.requires_grad]
# optimizer = optim.Adam(parameters, lr=0.01)

optimizer = optim.SGD(parameters, lr=0.1, momentum=0.9, weight_decay=5e-4)   
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda)

train(train_loader, test_loader, resnet101, optimizer, scheduler=scheduler, n_epochs=350)
save(resnet101, 'resnet101_epoch=350')

In [None]:
resnet101_loaded = bayes.ResNet(bayes.BasicBlock, [3, 4, 23, 3], random_weights=False)
load_model(resnet101_loaded, 'resnet101_epoch=350')

## Resnet with Random weights

In [None]:
# resnet_random = bayes.ResNet(bayes.RandomBasicBlock, [3, 4, 23, 3], random_weights=True)
# resnet_random.cuda()

resnet_random = bayes.ResNet(bayes.RandomBasicBlock, [2, 2, 2, 2], random_weights=True)
resnet_random.cuda()


parameters = [p for p in resnet_random.parameters() if p.requires_grad]
optimizer = optim.SGD(parameters, lr=0.01, momentum=0.9, weight_decay=5e-4)
# optimizer = optim.Adam(parameters, lr=0.0001)

scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda)

### Fix all conv layers parameters

In [19]:
for name, p in resnet_random.named_parameters():
    if 'W_mu' in name or 'W_logstd' in name:
        p.requires_grad = False
        
parameters = [p for p in resnet_random.parameters() if p.requires_grad]
optimizer = optim.SGD(parameters, lr=0.01, momentum=0.9, nesterov=True)
# optimizer = optim.Adam(parameters, lr=0.0001)

scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda)

In [None]:
train(train_loader, test_loader, resnet_random, optimizer, start_epoch=0, stop_epoch=350)









Process Process-18:
KeyboardInterrupt
Process Process-17:
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/yeahrmek/miniconda3/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/yeahrmek/miniconda3/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/home/yeahrmek/miniconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/yeahrmek/miniconda3/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/home/yeahrmek/miniconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 34, in _worker_loop
    r = index_queue.get()
  File "/home/yeahrmek/miniconda3/lib/python3.6/multiprocessing/queues.py", line 343, in get
    res = self._reader.recv_bytes()
  File "/home/yeahrmek/miniconda3/lib/python3.6/site-packages/torch/utils/data/dataload

KeyboardInterrupt: 

In [None]:
test_accuracy = evaluate(resnet101_random, test_loader)
print(test_accuracy)

In [None]:
torch.sigmoid(torch.exp(resnet101_random.conv1.W_logstd))

In [None]:
save(resnet101_random, 'resnet101_random_epoch=')

In [None]:
def eval(model, test_loader):
    test_accuracy = 0
    n_test_samples = 0
    model.eval()
    for test_batch_idx, (test_data, test_target) in enumerate(test_loader):
        test_data, test_target = test_data.cuda(), test_target.cuda()
        test_data = autograd.Variable(test_data, volatile=True)
        output = model(test_data).data
        _, argmax = output.max(1)
        test_accuracy += test_target.eq(argmax).sum()
        n_test_samples += test_data.size()[0]
    print('tTest acc: {:.3f}'.format(test_accuracy / n_test_samples))

In [None]:
resnet101_random_loaded = bayes.ResNet(bayes.RandomBasicBlock, [3, 4, 23, 3], random_weights=True)

load_model(resnet101_random_loaded, 'resnet101_random_epoch=350')
test(resnet101_random_loaded, test_loader)

In [None]:
test(resnet101_random, test_loader)

In [None]:
resnet101 = bayes.ResNet(bayes.BasicBlock, [3, 4, 23, 3], random_weights=False)

load_model(resnet101, 'resnet101_epoch=350')

In [None]:
for name, p in resnet101_random.named_parameters():
    if 'W_logstd' in name:
        std = torch.exp(p).data.cpu().numpy()[0]
        print(std)

In [None]:
for name, p in resnet101.named_parameters():
    print(name)
    if 'weight' in name:
        std = p.std().data.cpu().numpy()[0]
        print(std)