In [2]:
%load_ext autoreload
%autoreload 2

import os
import typing
import math

import numpy as np
import torch

import torch.optim
from matplotlib import pyplot as plt
from sklearn.metrics import roc_auc_score, average_precision_score
from torch import nn
from torch.nn import functional as F
from tqdm import trange

from util import ece, ParameterDistribution




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
from solution import *
from torch.distributions.multivariate_normal import MultivariateNormal
from torch.distributions.normal import Normal

### UnivariateGaussian

In [4]:
mu = torch.tensor(2).double()
sigma = torch.tensor(5).double()

normal_distr = UnivariateGaussian(mu, sigma)
normal_distr_torch = Normal(mu, sigma)

In [5]:
values = torch.normal(mu, sigma, size=(1, 10000))
normal_distr.log_likelihood(values)


tensor(-30227.6816)

### MultivariateDiagonalGaussian

In [6]:
out_features = 4
in_features= 3

mu = torch.rand(out_features, in_features)
rho = torch.rand(out_features, in_features)

multivar_diag = MultivariateDiagonalGaussian(mu, rho)
w_sampled = multivar_diag.sample()
multivar_diag.log_likelihood(w_sampled)




tensor(-24.4566)

### Bayesian  Layer

In [21]:
out_features = 4
in_features= 3
hidden_features = (16, 8, 8)

posterior_mu_init = 0
posterior_rho_init = -7.0

weight_mu = nn.Parameter(torch.Tensor(out_features, in_features).normal_(posterior_mu_init, 0.1))
weight_rho = nn.Parameter(torch.Tensor(out_features, in_features).normal_(posterior_rho_init, 0.1))

In [22]:
layer = BayesianLayer(in_features, out_features, True)


In [23]:
batch_size = 16

inputs = torch.rand(batch_size, in_features)
layer.forward(inputs)




torch.Size([4, 3])
torch.Size([4])
tensor(-21.7486, grad_fn=<AddBackward0>) torch.Size([])
tensor(-31.1485, grad_fn=<AddBackward0>) torch.Size([])


(tensor([[-2.6316,  1.7399,  0.1818,  1.4296],
         [-2.2322,  1.4679,  0.5731,  0.7393],
         [-2.2283,  1.4332, -0.4103,  0.3894],
         [-1.6370,  1.3735,  0.4285,  0.9613],
         [-3.5842,  1.7121, -0.2501,  0.1370],
         [-1.6185,  1.4763,  0.4632,  1.4515],
         [-1.1835,  1.2317,  0.6357,  0.8804],
         [-2.6657,  1.5155,  0.4268,  0.4321],
         [-2.6861,  1.8028,  0.4160,  1.6978],
         [-3.2765,  1.9183,  0.3285,  1.5295],
         [-2.3489,  1.5723, -0.0137,  0.9578],
         [-3.6382,  1.8506,  0.2129,  0.7918],
         [-3.7019,  1.8915,  0.3153,  0.9239],
         [-1.5076,  1.2989, -0.0141,  0.6836],
         [-1.0024,  1.2089,  0.6373,  0.9836],
         [-2.5920,  1.8123, -0.1415,  1.7349]], grad_fn=<AddmmBackward>),
 tensor(-21.7486, grad_fn=<AddBackward0>),
 tensor(-31.1485, grad_fn=<AddBackward0>))

In [24]:
bayes_net = BayesNet(in_features=in_features,
                     out_features=out_features,
                     hidden_features=hidden_features)

In [26]:
bayes_net.forward(inputs)

(tensor([[ 18.5998, -15.2754,  -2.1326,  20.1545],
         [ 15.1028,   9.3524,   5.8433,   6.4324],
         [ 10.2882,  -7.9667,  -7.1372,  13.1421],
         [ 20.0697,   3.1555,  24.0921,  11.0891],
         [ 18.5405, -14.1652, -13.8131,  19.9326],
         [ 30.2897, -27.2383,  38.2905,  16.7900],
         [ 22.7071,   5.3116,  41.9271,   7.1599],
         [  7.7026,   0.6955,  -3.8975,   4.7778],
         [ 28.5747, -28.1307,  15.9455,  21.7918],
         [ 20.3746, -15.0860, -13.1918,  22.4705],
         [ 15.5982, -10.9015, -10.5652,  17.0264],
         [ 21.2267, -16.0375, -15.3160,  22.4705],
         [ 22.6897, -17.0195, -15.9610,  23.9374],
         [ 16.3782,   5.7304,   4.1247,   9.1891],
         [ 25.9157,  -4.8992,  45.7600,   9.8269],
         [ 18.0695, -18.2578,  -3.6295,  19.4580]], grad_fn=<AddmmBackward>),
 tensor(-67.9848, grad_fn=<AddBackward0>),
 tensor(-81.1299, grad_fn=<AddBackward0>))