Much of the code here is based on the [tutorial on the edward homepage](http://edwardlib.org/getting-started).

In [115]:
%matplotlib inline
#from __future__ import absolute_import
#from __future__ import division
#from __future__ import print_function

import edward as ed
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np

import statsmodels.api as sm
import pandas as pd
plt.style.use('ggplot')

### Generate Toy Data

In [142]:
#super basic example: linear regression with homoskedastic errors
#Y~N(mu+Xb,sigma2)
# X is an 50x9 matrix where each column is gaussian(5,sd=3)
N = 50
Ntest = 40
Xtrain = np.array([np.random.normal(5,3,size=N) for i in range(9)]).T
Xtest = np.array([np.random.normal(5,3,size=Ntest) for i in range(9)]).T
#X is a 50x9 design matrix of continuous covariates
#print("colmeans: ", np.mean(X,0)) #column means, should be about 5
#print("col sds: ", np.std(X,0)) #column stdevs, should be about 3
beta0 = 33
beta = np.array([-3,-1.5,-.5,-.25,-0.01,0.01,.5,1,1.5])
#strong negative: 1,2
#weak negative: 3,4
#insignificant effects: 5,6
#weak positive: 7
#strong positive: 8,9
sigma = 4 #fairly high level of noise
yhat = Xtrain.dot(beta)+beta0
ytrain = yhat+np.random.normal(0,sigma,size=N)
ytest = Xtest.dot(beta)+beta0+np.random.normal(0,sigma,size=Ntest)
df = pd.DataFrame(np.hstack((ytrain.reshape(N,1),Xtrain)))
df.columns = ["y"]+["x"+str(i) for i in range(1,10)]
dftest = pd.DataFrame(np.hstack((ytest.reshape(Ntest,1),Xtest)))
dftest.columns = df.columns
df["intercept"]=1.0
dftest["intercept"] = 1.0
#df.head()
df.describe()

Unnamed: 0,y,x1,x2,x3,x4,x5,x6,x7,x8,x9,intercept
count,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0,50.0
mean,23.38407,4.836309,4.963879,4.989808,5.26038,5.55412,4.625539,5.121171,5.308879,5.288181,1.0
std,12.855284,3.224284,3.814252,2.615122,3.590815,2.780492,2.799095,3.398372,2.817629,2.750973,0.0
min,-7.299795,-1.874615,-5.542771,-0.452938,-1.958665,-2.984214,-0.980152,-1.655071,-0.194498,0.078327,1.0
25%,12.978215,2.818632,2.492646,3.092965,2.969495,3.978911,2.492299,3.061074,3.239619,3.218325,1.0
50%,28.32324,4.619567,5.187071,4.535591,4.58396,5.588893,4.293423,4.907327,5.338527,5.141689,1.0
75%,32.09733,6.607846,7.213141,6.704644,7.287659,7.279875,5.743156,7.503283,7.020034,7.181113,1.0
max,48.2121,12.485242,12.98145,11.099692,16.110177,13.013816,10.887334,12.356382,10.870269,11.280329,1.0


### Maximum Likelihood

First we will try the frequentist approach of maximum likelihood, provided in python by the **statsmodels** package

In [143]:
mod1 = sm.GLM(df["y"],df[df.columns[1:]],family=sm.families.Gaussian())
mod1 = mod1.fit()
mod1.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,50.0
Model:,GLM,Df Residuals:,40.0
Model Family:,Gaussian,Df Model:,9.0
Link Function:,identity,Scale:,15.8958780605
Method:,IRLS,Log-Likelihood:,-134.52
Date:,"Tue, 21 Mar 2017",Deviance:,635.84
Time:,13:30:41,Pearson chi2:,636.0
No. Iterations:,4,,

0,1,2,3,4,5
,coef,std err,z,P>|z|,[95.0% Conf. Int.]
x1,-2.9529,0.185,-16.000,0.000,-3.315 -2.591
x2,-1.6224,0.153,-10.631,0.000,-1.922 -1.323
x3,-0.8141,0.223,-3.644,0.000,-1.252 -0.376
x4,-0.3460,0.173,-2.002,0.045,-0.685 -0.007
x5,0.1192,0.213,0.559,0.576,-0.298 0.537
x6,-0.1766,0.225,-0.785,0.432,-0.617 0.264
x7,0.6477,0.185,3.505,0.000,0.286 1.010
x8,0.9531,0.210,4.549,0.000,0.542 1.364
x9,1.5750,0.230,6.843,0.000,1.124 2.026


In [144]:
ypred1 = mod1.predict(dftest[dftest.columns[1:]])
#root mean square prediction error
err1 = np.sqrt(np.mean((ypred1-ytest)**2))
err1 #OLS prediction error

4.3493895166547709

## Edward

### Maximum Likelihood

Edward supports maximum likelihood estimation (point estimates) of model parameters. However, you cannot actually inspect the parameter values. They are hidden inside the result object.

In [212]:
from edward.models import Normal

D = Xtest.shape[1]
X = tf.placeholder(tf.float32, [N, D]) #placeholder for data
#define likelihood and specify model parameters as variables (ie no priors)
b = tf.Variable(tf.zeros(D))
b0 = tf.Variable(0.0)
b0 = tf.Print(b0,[b0])
y = Normal(mu=ed.dot(X, b) + b0, sigma=sigma*tf.ones(N))
#use empty dict {} to force MLE instead of bayesian
mle = ed.Inference({}, {y:ytrain,X:Xtrain})
mle.run()

Iteration    1 [  0%]
Iteration  100 [ 10%]
Iteration  200 [ 20%]
Iteration  300 [ 30%]
Iteration  400 [ 40%]
Iteration  500 [ 50%]
Iteration  600 [ 60%]
Iteration  700 [ 70%]
Iteration  800 [ 80%]
Iteration  900 [ 90%]
Iteration 1000 [100%]


In [211]:
#sess = ed.get_session()
#form posterior predictive distribution
Xt = tf.placeholder(tf.float32, [Ntest,D])
yt = Normal(ed.dot(Xt,b)+b0,sigma=sigma*tf.ones(Ntest))
print("Mean squared error on test data:")
print(ed.evaluate('mean_squared_error', data={Xt: Xtest, yt: ytest}))

Mean squared error on test data:
549.902


### Maximum A Posteriori
Now we will try to solve the same problem using Edward. First attempt is MAP estimation, some code here is copied from [Edward tutorial](http://edwardlib.org/tutorials/supervised-regression)

In [198]:
help(ed.get_session)

Help on function get_session in module edward.util.graphs:

get_session()
    Get the globally defined TensorFlow session.
    
    If the session is not already defined, then the function will create
    a global session.
    
    Returns
    -------
    _ED_SESSION : tf.InteractiveSession



In [159]:
#define priors for model parameters
b = Normal(mu=tf.zeros(D), sigma=tf.ones(D))
b0 = Normal(mu=tf.zeros(1), sigma=tf.ones(1))
inference = ed.MAP([b0,b], {y:ytrain,X:Xtrain})
inference.run()

Iteration    1 [  0%]: Loss = 1236.559
Iteration  100 [ 10%]: Loss = 206.707
Iteration  200 [ 20%]: Loss = 194.715
Iteration  300 [ 30%]: Loss = 184.819
Iteration  400 [ 40%]: Loss = 176.409
Iteration  500 [ 50%]: Loss = 169.599
Iteration  600 [ 60%]: Loss = 164.221
Iteration  700 [ 70%]: Loss = 160.027
Iteration  800 [ 80%]: Loss = 156.774
Iteration  900 [ 90%]: Loss = 154.256
Iteration 1000 [100%]: Loss = 152.302


In [153]:
print(inference)

<edward.inferences.map.MAP object at 0x121b6fb38>


Iteration    1 [  0%]: Loss = 3020.843
Iteration  100 [ 10%]: Loss = 244.376
Iteration  200 [ 20%]: Loss = 220.132
Iteration  300 [ 30%]: Loss = 219.095
Iteration  400 [ 40%]: Loss = 218.850
Iteration  500 [ 50%]: Loss = 218.782
Iteration  600 [ 60%]: Loss = 218.765
Iteration  700 [ 70%]: Loss = 218.761
Iteration  800 [ 80%]: Loss = 218.760
Iteration  900 [ 90%]: Loss = 218.759
Iteration 1000 [100%]: Loss = 218.759


In [149]:
dir(inference)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'build_loss_and_gradients',
 'coord',
 'data',
 'debug',
 'finalize',
 'increment_t',
 'initialize',
 'latent_vars',
 'logging',
 'loss',
 'n_iter',
 'n_print',
 'print_progress',
 'run',
 'scale',
 't',
 'threads',
 'train',
 'update']

In [141]:
ytrain.shape

(50, 1)