In [None]:
# import standard libraries
!pip install autograd
from autograd import numpy as np
from autograd import grad
from autograd.misc.optimizers import adam, sgd
from autograd import scipy as sp
import autograd.numpy.random as npr
import pandas as pd
import numpy
import matplotlib.pyplot as plt
import sys

# import our libraries
import bayes_helpers as bh
from utils import generate_data
from utils import run_toy_nn
from feed_forward import Feedforward
from nlm import NLM

In [None]:
from nlm import NLM

class LUNA(NLM):
    """
    Fits LUNA Model; inherits from NLM and overrides the objective function
    
    Model Assumptions
     - Weights distributed normally
     - Ys distributed normally

     How to use:
      - run train() to create: 
            a) the NN MLE weights, found in self.ff.weights 
            b) self.posterior samples, the distribution for the weights in the last layer of NLM
     
      - run predict() to get distribution of ys, given x test
    """

    def __init__(self, architecture, grad_finite_diff=None):
        prior_var = 1.
        y_noise_var = 2.
        random = np.random.RandomState(0)
        #inherit from NLM, override objective func
        super().__init__(prior_var, y_noise_var, architecture, random, self.make_objective) 
        self.D, self.D_out, self.H = self.ff.D, architecture['output_dim'], architecture['width']
        
        # override default finite difference method for cosine similarity calc (see cos_sim_sq function)
        if grad_finite_diff:
            self.grad_finite_diff = grad_finite_diff
        else:
            self.grad_finite_diff = self.default_grad_finite_diff

    def get_aux_funcs(self, W):
        # returns 1) list of aux func weights (a list-of-lists)
        #     and 2) list of aux func biases (a list of numbers)
        res_w, res_b = [], []
        D, D_out, H = self.D, self.D_out, self.H
        index = D - (D_out*H + D_out) - 1
        for m in range(D_out):
            w_m = W[0][index + H*m:index + H*(m+1)]
            b_m = W[0][index + H*(m+1)]
            index += 1
            res_w.append(w_m)      
            res_b.append(b_m)
        return res_w, res_b

    def default_grad_finite_diff(self, f, x):
        # f is a vectorized function, x is a vector
        dx = np.random.normal(0, 0.1) #see LUNA paper, appendix B.1
        return (f(x + dx) - f(x))/dx

    def similarity_score(self, W):
        # returns sum of cosine similarity score over all pairwise combinations of aux functions
        D, D_out, H = self.D, self.D_out, self.H      

        def cos_sim_sq(fi, fj, x): 
            # returns 1 when fi parallel to fj
            # returns 0 when fi perpendicular to fj
            grad_i = self.grad_finite_diff(fi, x)
            grad_j = self.grad_finite_diff(fj, x)
            numerator = np.dot(grad_i, grad_j.T)**2
            denominator = np.dot(grad_i, grad_i.T) * np.dot(grad_j, grad_j.T)
            frac = numerator/denominator
            return frac

        # calculate square of cosine similarity for each pair of aux functions
        score = 0
        final_hidden_layer = self.ff.forward(W, x_train, final_layer_out=True)
        aux_func_weights, aux_func_biases = self.get_aux_funcs(W)
        for i in range(D_out):
            w_i = aux_func_weights[i]
            b_i = aux_func_biases[i]
            f_i = lambda x : np.matmul(w_i, x) + b_i#applying aux weights w_i to last hidden layer
            for j in range(i + 1, D_out):
                w_j = aux_func_weights[j]
                b_j = aux_func_biases[j]
                f_j = lambda x : np.matmul(w_j, x) + b_j#applying aux weights w_j to last hidden layer
                score += cos_sim_sq(f_i, f_j, final_hidden_layer)
        
        return score
    
    def mean_mean_sq_error(self, W):
        # returns mean of the mse across all aux regressors
        D, D_out, H = self.D, self.D_out, self.H
        aux_outputs = self.ff.forward(W, x_train) #shape = (1,10,12)
        Y = np.tile(y_train, D_out).reshape(1, D_out, y_train.shape[1])

        # calculate squared error for each aux regressor, take mean
        mean_mse = np.mean(np.linalg.norm(Y - aux_outputs, axis=1)**2)

        return mean_mse

    # for LUNA, this needs to use aux functions
    def make_objective(self, x_train, y_train, reg_param):

        def objective(W, t):
            # L_luna(model) = L_fit(model) - L_similarity(model)
            reg_param = 0.1 #eventually this will become an input
            lambda_ = 0.1 #eventually this will become an input
            L_sim = lambda_*self.similarity_score(W)

            regularization_penalty = reg_param*np.linalg.norm(W)**2
            mean_mse = self.mean_mean_sq_error(W)
            L_fit = mean_mse - regularization_penalty

            return L_fit - L_sim

        return objective, grad(objective)

### Define Hyperameters

In [None]:
###relu activation
activation_fn_type = 'relu'
activation_fn = lambda x: np.maximum(np.zeros(x.shape), x)


###neural network model design choices
width = 7
hidden_layers = 2
input_dim = 1
output_dim = 10 #number of auxiliary functions

architecture = {'width': width,
               'hidden_layers': hidden_layers,
               'input_dim': input_dim,
               'output_dim': output_dim,
               'activation_fn_type': 'relu',
               'activation_fn_params': 'rate=1',
               'activation_fn': activation_fn}

luna = LUNA(architecture)

In [None]:
df = pd.read_csv("HW8_data.csv")
x_train = np.array(df["x"])
y_train = np.array(df["y"])
x_test = np.linspace(x_train.min()-1,x_train.max()+1,200)

x_train = x_train.reshape((1, -1))
y_train = y_train.reshape((1, -1))
x_test = x_test.reshape((1, -1))

params = {'step_size':1e-3, 
          'max_iteration':100, 
          'random_restarts':1,
          'optimizer':'adam'}

luna.train(x_train, y_train, params)

In [None]:
# predict on the test x-values
posterior_predictions, posterior_predictive_samples = luna.predict(x_test)
bh.viz_pp_samples(x_train,y_train,x_test.flatten(),posterior_predictive_samples,'La Luna!')
