In [None]:
# import standard libraries
from autograd import numpy as np
from autograd import grad
from autograd.misc.optimizers import adam, sgd
from autograd import scipy as sp
import autograd.numpy.random as npr
import pandas as pd
import numpy
import matplotlib.pyplot as plt
import sys
import time

# import our libraries
import bayes_helpers as bh
from utils import generate_data, run_toy_nn
from feed_forward import Feedforward
from nlm import NLM
from luna import LUNA

### Generate Cubic Dataset

Generates 100 datapoints for train and 100 points for test according to the function

$$y = \frac{1}{2}x^3 + \epsilon$$

$$\epsilon \sim N(0, 3^2)$$

In [2]:
x_train, y_train, x_test = generate_data()

### Define LUNA AND NLM Hyperameters

In [3]:
######################
### COMMMON PARAMS####
######################


####  activation function ####
activation_fn_type = 'relu'
activation_fn = lambda x: np.maximum(np.zeros(x.shape), x)

#### optimization parameters ####
optimization_params = {'step_size':1e-3, 
          'max_iteration':10000, 
          'random_restarts':1,
          'optimizer':'adam'}

#### model parameters #####
prior_variance = 1 # chosen in the paper. declared "reasonable". who decided this
y_noise_variance = 9 # needs to match what the dataset itself is

#### other ####
random = np.random.RandomState(0)

########################
#### LUNA SPECIFIC  ####
########################

luna_architecture = {'width': 50,
            'hidden_layers': 2,
            'input_dim': 1,
            'output_dim': 50, #number of auxiliary functions, # note, in NLM, below, we change to 1
            'activation_fn_type': activation_fn_type,
            'activation_fn_params': 'rate=1',
            'activation_fn': activation_fn}

regularization_param_luna = 1e-1 # in the paper they searched over 1e-3,...,1e3 and chose 1e-1 for regularization

similarity_param = 1e0 # in the paper they searched over 1e-3,...,1e3 and chose 1e0 for similarity

########################
#### NLM SPECIFIC  ####
########################

nlm_architecture = luna_architecture
nlm_architecture["output_dim"] = 1
regularization_param_nlm = 8.37 #they chose this in the paper, what a beautifully specific number



### Train NLM

In [4]:
t0 = time.time()
nlm = NLM(prior_variance,y_noise_variance, regularization_param_nlm, nlm_architecture, random)
nlm.train(x_train,y_train, optimization_params)
print(f"time: {np.round(time.time() - t0, 3)} seconds")

 Iteration 9900 lower bound 67.1551033433542; gradient mag: 2.11841634249813373
Done Training
time: 1289.346 seconds


### Train LUNA

In [None]:
t0 = time.time()
luna = LUNA(prior_variance, y_noise_variance, regularization_param_luna, similarity_param, luna_architecture, random)
luna.train(x_train, y_train, optimization_params)
print(f"time: {np.round(time.time() - t0, 3)} seconds")

 Iteration 4900 lower bound -7544.035962786708; gradient mag: 71.2929460726677656

### Compare Results
- notice they have identical post processing function calls

In [None]:
# fig, ax = plt.subplots(1,2, figsize = (12, 6))
i = 0
for name, model in {"Luna":luna,"NLM":nlm}.items():
    posterior_predictions, posterior_predictive_samples = model.predict(x_test)
    bh.viz_pp_samples(x_train, y_train,x_test.flatten(),posterior_predictive_samples,name)

other notes: 
 - trained on 100 points
 