In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable as V

import pyro
from pyro.distributions import Normal
from pyro.infer import SVI, Trace_ELBO
from pyro.optim import Adam
# for CI testing
smoke_test = ('CI' in os.environ)
pyro.enable_validation(True)

In [34]:
class RegressionModel(nn.Module):
    def __init__(self, indim, outdim):
        # p = number of features
        super(RegressionModel, self).__init__()
        self.conv1 = nn.Conv1d(1, 1, kernel_size=3, stride=1, padding=1)
        self.linear = nn.Linear(indim, outdim)

    def forward(self, x):
        y = self.conv1(x)
        print(y.size())
        y = self.linear(y)
        print(y.size())
        return y

regression_model = RegressionModel(10, 5)

In [3]:
regression_model

RegressionModel(
  (conv1): Conv1d(1, 1, kernel_size=(3,), stride=(1,), padding=(1,))
  (linear): Linear(in_features=10, out_features=5, bias=True)
)

In [41]:
L = nn.Linear(10, 5)
L(torch.rand(500,1,10)).size()

torch.Size([500, 1, 5])

In [45]:
N = 500  # size of toy data
indim = 10
outdim = 5

def build_linear_dataset(N, indim, outdim, noise_std=0.01):
    X = np.random.rand(N, indim)
    # w = 3
    w = 3 * np.ones(indim)
    # b = 1
    y = np.matmul(X, w) + np.repeat(1, N) + np.random.normal(0, noise_std, size=N)
    y = y.reshape(N, 1)
    X, y = torch.tensor(X).type(torch.Tensor), torch.tensor(y).type(torch.Tensor)
    data = torch.cat((X, y), 1)
    assert data.shape == (N, indim + 1)
    return data

In [48]:
torch.zeros(1,1,1)

tensor([[[0.]]])

In [57]:
def model(data):
    # Create unit normal priors over the parameters
    loc, scale = torch.zeros([1]*indim), 10 * torch.ones([1]*indim)
    bias_loc, bias_scale = torch.zeros(1), 10 * torch.ones(1)
    w_prior = Normal(loc, scale).independent(1)
    b_prior = Normal(bias_loc, bias_scale).independent(1)
    priors = {'linear.weight': w_prior, 'linear.bias': b_prior}
    # lift module parameters to random variables sampled from the priors
    lifted_module = pyro.random_module("module", regression_model, priors)
    print('yo')
    # sample a regressor (which also samples w and b)
    lifted_reg_model = lifted_module()
    with pyro.iarange("map", N):
        x_data = data[:, :-1].view(N,1,-1)
        y_data = data[:, -1]

        # run the regressor forward conditioned on data
        prediction_mean = lifted_reg_model(x_data).squeeze(-1)
        print(prediction_mean)
        # condition on the observed data
        pyro.sample("obs",
                    Normal(prediction_mean, 0.1 * torch.ones(data.size(0))),
                    obs=y_data)

In [50]:
softplus = torch.nn.Softplus()

def guide(data):
    # define our variational parameters
    w_loc = torch.randn(1, 1)
    # note that we initialize our scales to be pretty narrow
    w_log_sig = torch.tensor(-3.0 * torch.ones(1, 1) + 0.05 * torch.randn(1, 1))
    b_loc = torch.randn(1)
    b_log_sig = torch.tensor(-3.0 * torch.ones(1) + 0.05 * torch.randn(1))
    # register learnable params in the param store
    mw_param = pyro.param("guide_mean_weight", w_loc)
    sw_param = softplus(pyro.param("guide_log_scale_weight", w_log_sig))
    mb_param = pyro.param("guide_mean_bias", b_loc)
    sb_param = softplus(pyro.param("guide_log_scale_bias", b_log_sig))
    # guide distributions for w and b
    w_dist = Normal(mw_param, sw_param).independent(1)
    b_dist = Normal(mb_param, sb_param).independent(1)
    dists = {'linear.weight': w_dist, 'linear.bias': b_dist}
    # overload the parameters in the module with random samples
    # from the guide distributions
    lifted_module = pyro.random_module("module", regression_model, dists)
    # sample a regressor (which also samples w and b)
    return lifted_module()

In [58]:
optim = Adam({"lr": 0.05})
svi = SVI(model, guide, optim, loss=Trace_ELBO())

In [59]:
num_iterations = 1000 if not smoke_test else 2
def main():
    pyro.clear_param_store()
    data = build_linear_dataset(N, 10, 5)
    for j in range(num_iterations):
        # calculate the loss and take a gradient step
        loss = svi.step(data)
        if j % 100 == 0:
            print("[iteration %04d] loss: %.4f" % (j + 1, loss / float(N)))

if __name__ == '__main__':
    main()

yo
torch.Size([500, 1, 10])


RuntimeError: size mismatch, m1: [500 x 10], m2: [1 x 1] at /opt/conda/conda-bld/pytorch_1535493744281/work/aten/src/TH/generic/THTensorMath.cpp:2070