<a href="https://colab.research.google.com/github/ymohamedahmed/drbayes/blob/master/SIBDL_demo_group_13.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Machine Learning Seminars - Subspace Inference for Bayesian Deep Learning - Demo

*Reviewed by Chiara Campagnola, Yousuf Mohamed-Ahmed and Hannah Teufel*

In [6]:
!rm -rf drbayes
!git clone https://github.com/ymohamedahmed/drbayes.git

Cloning into 'drbayes'...
remote: Enumerating objects: 73, done.[K
remote: Counting objects: 100% (73/73), done.[K
remote: Compressing objects: 100% (56/56), done.[K
remote: Total 334 (delta 32), reused 42 (delta 15), pack-reused 261[K
Receiving objects: 100% (334/334), 11.60 MiB | 15.91 MiB/s, done.
Resolving deltas: 100% (102/102), done.


In [7]:
!pip install -e drbayes

Obtaining file:///content/drbayes
Collecting gpytorch>=0.1.0rc4
[?25l  Downloading https://files.pythonhosted.org/packages/0f/d0/96634a8ae84b08bd64709c1abd4f319a70f404967c598690bca8be143fb8/gpytorch-1.4.0.tar.gz (286kB)
[K     |████████████████████████████████| 286kB 10.2MB/s 
Collecting pyro-ppl==1.5.2
[?25l  Downloading https://files.pythonhosted.org/packages/79/4d/e45ff02364438ce8698ed70b1fbd9240f7c4f6e509fb90e9c04657f895b5/pyro_ppl-1.5.2-py3-none-any.whl (607kB)
[K     |████████████████████████████████| 614kB 18.6MB/s 
Collecting pyro-api>=0.1.1
  Downloading https://files.pythonhosted.org/packages/fc/81/957ae78e6398460a7230b0eb9b8f1cb954c5e913e868e48d89324c68cec7/pyro_api-0.1.2-py3-none-any.whl
Building wheels for collected packages: gpytorch
  Building wheel for gpytorch (setup.py) ... [?25l[?25hdone
  Created wheel for gpytorch: filename=gpytorch-1.4.0-py2.py3-none-any.whl size=477826 sha256=d542a01a2baf09e79d45c2316820a2e74a5bd43d348ee13b52f735f893b2d277
  Stored in direc

In [38]:
import sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import torch
import torch.utils.data
from torch.nn import functional as F
import torch.nn as nn
from torchvision import datasets, transforms

import seaborn as sns

import subspace_inference
import subspace_inference.utils as utils
from subspace_inference.posteriors import SWAG
from subspace_inference import models, losses, utils
from subspace_inference.models import MLP
from subspace_inference.visualization import plot_predictive
from subspace_inference.posteriors.proj_model import SubspaceModel
from tqdm import tqdm

import os

torch.backends.cudnn.benchmark = True
torch.manual_seed(1)
torch.cuda.manual_seed(1)
np.random.seed(1)

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [33]:
transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
train = datasets.QMNIST(root="../data",train=True, download=True,
                   transform=transform)
test = datasets.QMNIST('../data', train=False,download=True,
                   transform=transform)
train_loader = torch.utils.data.DataLoader(train,batch_size=512)

## Models

In [8]:
def train(model, loader, optimizer, criterion, lr_init=1e-2, epochs=3000, 
          swag_model=None, swag=False, swag_start=2000, swag_freq=50, swag_lr=1e-3,
          print_freq=100):
    
    for epoch in range(epochs):
        t = (epoch + 1) / swag_start if swag else (epoch + 1) / epochs
        lr_ratio = swag_lr / lr_init if swag else 0.05
        
        if t <= 0.5:
            factor = 1.0
        elif t <= 0.9:
            factor = 1.0 - (1.0 - lr_ratio) * (t - 0.5) / 0.4
        else:
            factor = lr_ratio

        lr = factor * lr_init
        utils.adjust_learning_rate(optimizer, lr)
        
        train_res = utils.train_epoch(loader, model, criterion, optimizer, cuda=False, regression=False)
        if swag and epoch > swag_start:
            swag_model.collect_model(model)
        
        if (epoch % print_freq == 0 or epoch == epochs - 1):
            print('Epoch %d. LR: %g. Loss: %.4f' % (epoch, lr, train_res['loss']))


In [11]:
wd = 0.
lr_init = 1e-2

model_cfg = models.ToyRegNet
criterion = losses.GaussianLikelihood(noise_var=1.)
criterion = F.cross_entropy
model_cfg.kwargs = {"dimensions":[20,20], "output_dim":10, "input_dim":28*28}
model = model_cfg.base(*model_cfg.args, **model_cfg.kwargs)
for i in range(2):
    print("Training Model", i)
    swag_model = SWAG(model_cfg.base, subspace_type="pca", *model_cfg.args, **model_cfg.kwargs, 
                  subspace_kwargs={"max_rank": 10, "pca_rank": 10})
    model = model_cfg.base(*model_cfg.args, **model_cfg.kwargs)
    optimizer = torch.optim.SGD(model.parameters(), lr=lr_init, momentum=0.95, weight_decay=wd)
    
    train(model, train_loader, optimizer, criterion, lr_init, 3000, print_freq=1000, 
          swag=True, swag_model=swag_model, swag_start=2000, swag_freq=10, swag_lr=1e-2)

Training Model 0


ModuleAttributeError: ignored

In [46]:

def train(model, loss_function, max_epochs, train_loader):
  optimiser = torch.optim.Adam(model.parameters(), lr=0.001)
  swag_model = SWAG(VanillaMLP, subspace_type="pca",
                  subspace_kwargs={"max_rank": 10, "pca_rank": 10},dims=[28*28,50,20,10])
  for epoch in range(max_epochs):
    total_loss = 0
    for x,y in train_loader:
      x = x.to(device)
      y = y.to(device)
      optimizer.zero_grad()
      # out = F.softmax(model(x.flatten()))
      out = model(x)
      # swag_model.collect_model(model)
      loss = loss_function(out,y)
      total_loss += loss
      loss.backward()
      optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {loss}")

  return swag_model.get_space()

### Mean Field Variational Inference

### Ensembles

### Neural network + Bayesian Linear Regression

### _SIBDL_: PCA subspace

In [30]:
class VanillaMLP(nn.Module):
  def __init__(self, dims):
    super(VanillaMLP,self).__init__()
    layers = [nn.Flatten()] + [lay for (x,y) in zip(dims[:-1],dims[1:]) for lay in [nn.Linear(x,y), nn.ReLU()] ]
    layers.pop()
    
    self.model = nn.Sequential(*layers)

  def forward(self,x):
    return self.model(x)

In [15]:
model = VanillaMLP([28*28,50,20,10])
print(model)

VanillaMLP(
  (model): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=784, out_features=50, bias=True)
    (2): ReLU()
    (3): Linear(in_features=50, out_features=20, bias=True)
    (4): ReLU()
    (5): Linear(in_features=20, out_features=10, bias=True)
  )
)


In [35]:
model = VanillaMLP([28*28,50,20,10])
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
space = train(model, F.cross_entropy, 20, train_loader)
torch.save(model.state_dict(), "MLP.pt")

Epoch: 0, loss: 0.3758305609226227
Epoch: 10, loss: 0.06851270794868469


In [36]:
space

(tensor([-0.0278, -0.0378, -0.0119,  ...,  0.0468, -0.1616, -0.1590]),
 tensor([2.8196e-05, 2.8204e-05, 2.8196e-05,  ..., 5.7454e-05, 1.1363e-04,
         1.0563e-04]),
 tensor([[-1.6371e-03, -1.6370e-03, -1.6369e-03,  ..., -7.9936e-03,
           1.1685e-02, -1.0416e-02],
         [ 5.1491e-04,  5.1496e-04,  5.1497e-04,  ...,  3.8997e-04,
          -3.6317e-04, -1.1911e-04],
         [ 7.3030e-05,  7.2600e-05,  7.2626e-05,  ..., -1.7019e-04,
          -2.0231e-04,  3.5258e-05],
         ...,
         [ 1.1435e-05,  1.1249e-05,  1.1269e-05,  ..., -4.4610e-07,
          -1.8295e-06,  4.5624e-06],
         [-2.6892e-07, -1.1821e-06, -1.1737e-06,  ..., -1.9457e-07,
           3.0103e-06,  6.0429e-07],
         [ 3.8554e-07, -7.5043e-07, -7.3901e-07,  ...,  1.6949e-06,
           1.4114e-06,  8.9324e-07]]))

In [1]:
  model_cfg.args

NameError: ignored

In [47]:
from subspace_inference.posteriors.vi_model import VIModel, ELBO
import math
def get_pca_space():
    # swag_model = SWAG(model_cfg.base, subspace_type="pca", *model_cfg.args, **model_cfg.kwargs, 
    #               subspace_kwargs={"max_rank": 10, "pca_rank": 10})
    # print(torch.load("MLP.pt").keys())
    # swag_model.load_state_dict(torch.load("MLP.pt"))#["state_dict"])
    # mean, _, cov_factor = swag_model.get_space()
    mean, _, cov_factor = space
    subspace = SubspaceModel(mean, cov_factor)
    return subspace

subspace = get_pca_space()
init_sigma = 1.
prior_sigma = 5.
criterion = losses.GaussianLikelihood(noise_var=.05)
temperature = 1.

vi_model = VIModel(
    subspace=subspace,
    init_inv_softplus_sigma=math.log(math.exp(init_sigma) - 1.0),
    prior_log_sigma=math.log(prior_sigma),
    base=VanillaMLP,
    dims=[28*28,50,20,10]
)

elbo = ELBO(criterion, len(train_loader.dataset), temperature=temperature)
optimizer = torch.optim.Adam([param for param in vi_model.parameters()], lr=.1)
vi_model.to(device)
train(vi_model, criterion, 30, train_loader)
# for epoch in range(2000):
#     optimizer.zero_grad()
#     train_res = utils.train_epoch(loader, vi_model, elbo, optimizer, regression=True, cuda=False)
#     sigma = torch.nn.functional.softplus(vi_model.inv_softplus_sigma.detach().cpu())
#     if epoch % 1000 == 0 or epoch == 1999:
#         print(epoch, train_res)
#     if epoch == 1000:
#         utils.adjust_learning_rate(optimizer, 0.01)

TypeError: ignored

## _References_

- A very useful repository for a lot of Bayesian NN implementations: https://github.com/JavierAntoran/Bayesian-Neural-Networks
- The code for the paper is found at https://github.com/wjmaddox/drbayes and specifically the following notebook was adapted for this demonstration (https://github.com/wjmaddox/drbayes/blob/master/experiments/synthetic_regression/visualizing_uncertainty.ipynb)
