# Bayesian Inference - Basics
Description of bayesian inference. Equation. etc.

...

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
from scipy import interpolate, integrate, stats
import ipywidgets as widgets

In [2]:
import matplotlib
matplotlib.rcParams['text.usetex'] = True
plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Palatino"],
})

## Data
The data consists of 5 points that closely follow a linear trend, with zero y-intercept and slope 3. Each datapoint is treated as a random variable, and the points shown here corresponds to the mean. The distribution of each datapoint will be described in the 'Likelihood' section.

In [3]:
gamma = 0.05

In [4]:
Nd = 10
max_x = 5.
slope = 3.
slope2 = 8.
noise_factor = 0.2
##xd = np.random.random(Nd)*max_x
#xd = np.linspace(0.5, max_x, Nd)
#yd1 = slope*xd  
#yd2 = np.sqrt(xd) *slope2
beta = 0.5
#yd = beta * yd1  + (1-beta) * yd2
#noise_ratio = 2 * noise_factor*(np.random.random(Nd) - 0.5)
#yd += noise_ratio * yd

xd = np.array([ 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5.])
yd = np.array([ 2.96154494,  5.41365771,  7.96523347,  9.42709451, 10.09198426,
       12.99207291, 13.57041916, 14.4357646 , 17.81360468, 14.93374659])


In [5]:
yd

array([ 2.96154494,  5.41365771,  7.96523347,  9.42709451, 10.09198426,
       12.99207291, 13.57041916, 14.4357646 , 17.81360468, 14.93374659])

In [6]:
#xd = np.array([6.04661916, 8.26642889, 7.33459925, 9.17361318, 0.17396911])
#yd = np.array([12.87410392, 16.9335356 , 15.03438594, 18.30444499,  0.78733539])

In [20]:
fig = plt.figure(figsize=(4.5,3))
xm = np.linspace(0, max_x, 100)
#ym = beta * slope*xm + (1-beta) * np.exp(gamma*slope*xm) 
ym1 = beta * slope*xm 
#ym2 = (1-beta) * np.sqrt(gamma*slope*xm) /gamma
ym2 = (1-beta) * np.sqrt(xm) *slope2
ym = ym1 + ym2
plt.plot(xd, yd, 'C0o',ms=5)
plt.plot(xm, ym, 'k--', label='total',linewidth=1.8)
plt.plot(xm, ym1, 'r-.', label='linear',linewidth=1.8)
plt.plot(xm, ym2, 'b-.', label='square root',linewidth=1.8)
plt.xlabel(r'$x$',fontsize=13)
plt.ylabel(r'$y$',fontsize=13)
plt.xticks(fontsize=12)
plt.yticks([0,5,10,15,20],fontsize=12)
plt.xlim(0,5.2)
plt.ylim(0,20)
plt.legend(fontsize=11.5)
plt.savefig('../figs-new/data.pdf',bbox_inches='tight')
plt.show()

<IPython.core.display.Javascript object>

In [21]:
noise_ratio

NameError: name 'noise_ratio' is not defined

In [22]:
yd

array([ 2.96154494,  5.41365771,  7.96523347,  9.42709451, 10.09198426,
       12.99207291, 13.57041916, 14.4357646 , 17.81360468, 14.93374659])

In [23]:
xd

array([0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ])

## Model
We model this relationship with a one parameter model. The goal will be to determine the value of that parameter.
The model is a linear model with zero intercept, and the parameter is the slope of the line.

In [24]:
def model(w, x, mtag):
    w = np.atleast_1d(w)
    x = np.atleast_1d(x)
    # XH: added switching of models; 1 = linear; 2 = exponential
    if mtag == 1 :
        y = w[:,None]*x[None,:]
    else:
        #y = np.exp(gamma*w[:,None]*x[None,:])
        #y = np.sqrt(gamma*w[:,None]*x[None,:]) / gamma
        y = w[:,None]*np.sqrt(x[None,:])
    return np.squeeze(y)

## Likelihood - Interactive
The likelihood of the model given the new data is equal to the probability that the data came from the model.
We will model the data as random variables with gaussian distribution, mean equal to the data value, and standard deviation equal to twenty percent of the mean. The likelihood of the model given a single data point is the probability that the data has the value given by the model for the x-location of the datapoint. The likelihood for a collection of data point is given as the product of the likelihood of each point. This value is normalized by the integral of the likelihood of all  possible models to get a probability distribution with integral equal to one. This result is the likelihood of each parameter given that the model has the form chosen.

In this interactive widget each data point distribution is plotted, as is the model. The value of the parameter can be adjusted. The probability that each data point came from the given model is also displayed. As the value of the parameter is adjusted, the corresponding point in the likelihood plot is also shown.

In [25]:
w_min = 0.
w_max = 10.
w2_min = w_min
w2_max = w_max
Nw = 1001
error = 0.2
Npdfpts = 21

In [26]:
def likelihood(w, xd=xd, yd=yd, error=0.2, mtag=1):
    mean = yd
    stddev = error * mean
    ymd = model(w, xd, mtag)
    lld = 1./np.sqrt(2.*np.pi*stddev[None,:]**2) * \
        np.exp(-1.*((ymd-mean[None,:])**2)/(2.*stddev[None,:]**2))
    return np.prod(lld, axis=1)

In [27]:
W = np.linspace(w_min, w_max, Nw)
mtag = 1
llhd_i = likelihood(W, xd, yd, error, mtag)
llhd = interpolate.interp1d(W, llhd_i, kind='linear')
llhd_int = integrate.quad(llhd, w_min, w_max)[0]

In [28]:
plt.figure()
plt.plot(W, llhd_i)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x120035fd0>]

In [29]:
# XH: setup for the second model
W2 = np.linspace(w2_min, w2_max, Nw)
mtag = 2
llhd2_i = likelihood(W2, xd, yd, error, mtag)
llhd2 = interpolate.interp1d(W2, llhd2_i, kind='linear')
llhd2_int = integrate.quad(llhd2, w2_min, w2_max)[0]

In [30]:
plt.figure()
plt.plot(W2, llhd2_i)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x11eaf0be0>]

# initialize plot
fig = plt.figure(figsize=(10, 5))
ax1 = fig.add_subplot(1, 2, 1)
plt.xlabel('x')
plt.ylabel('y')
ax2 = fig.add_subplot(1, 2, 2)
plt.xlabel('w')
plt.ylabel('Likelihood of w given data')

#plot data and data PDFs
mean = yd
stddev = error * mean
ppts = np.linspace(0,1,Npdfpts-1)
ppts = np.append(ppts,1.)
norms = []
for i in range(Nd):
    norm = stats.norm(loc=mean[i], scale=stddev[i])
    norms.append(norm)
    xp = [xd[i]]*2
    for j in range(Npdfpts-1):
        yp = [norm.ppf(ppts[j]), norm.ppf(ppts[j+1])]
        alpha = (0.5-np.abs(ppts[j]-0.5))/0.5
        ax1.plot(xp, yp, 'C0-', alpha=alpha)
ax1.plot(xd, yd, 'C0o')

# plot initial model
winit = w_min
ym = model(winit, xm)
line, = ax1.plot(xm, ym,'r-')

# Annotate data likelihood
annotation = []
ymd = model(winit, xd)
for i,xy in enumerate(zip(xd, yd)):
    ann = norms[i].pdf(ymd[i])*100.
    annotation.append(ax1.annotate(' {:3.0f}%'.format(ann), xy=xy, textcoords='data'))

# Plot Likelihood    
ax2.plot(W, llhd_i/llhd_int, 'C0-')
point, = ax2.plot(winit, likelihood(winit), 'C0o')
    
ax1.set_xlim(left=0)
ax1.set_ylim(ymin=0)
ax2.set_xlim(left=0)
ax2.set_ylim(ymin=0)

# Widget 
def update(w = winit):
    ym = model(w, xm)
    line.set_ydata(ym)
    point.set_xdata(w)
    point.set_ydata(likelihood(w)/llhd_int)
    ymd = model(w, xd)
    for i in range(Nd):
        ann = norms[i].pdf(ymd[i])*100.
        annotation[i].set_text('{:3.0f}%'.format(ann))
    fig.canvas.draw()
widgets.interact(update, w=(w_min,w_max,0.1));

## Prior and Posterior - Interactive
The prior is our current belief regarding the value of the parameter.
We consider several priors:

    0 - The slope is equally likely to have any value.
    
    1 - The slope is equally likely to have any value between 1 and 3, with any other values being impossible.
    
    2 - The slope is most likely to be 2, with probability of other values given by a Gaussian distribution.
    
    3 - The slope is 2, with any other values being impossible. 

The posterior is the result of modifying our current belief (prior) based on the data. The posterior is the product of the likelihood (data) and the prior. The value at each w is normalized by the integral of this product to make the intregral of the posterior be 1. 


In this widget 4 different priors can be chosen. These examples show that:

    * when we have no belief (case 0) the posterior = the data. This reduces to the Maximum Likelihood Estimate.
    * when our beleif is absolute (case 2) the posterior = the prior regardless of the data
    * when we have a balance between strong beleif and flexibility the posterior depends on both the data and prior.

In [31]:
case_init = 0
y_lim = 0
y_inf = 10
Nc = 4

In [32]:
def prior(w, case, w_min=w_min, w_max=w_max):
    if case==0:
        return 1./(w_max-w_min)*(w>=w_min and w<=w_max)
    elif case==1:
        w_min = 0
        w_max = 10
        return 1./(w_max-w_min)*(w>w_min and w<=w_max)
    elif case==2:
        return stats.norm(loc=2., scale=0.5).pdf(w)
    elif case==3:
        return (w==2.)*1.

In [33]:
prior_i = np.array([prior(W[i], case_init) for i in range(Nw)])

In [34]:
dw = W[1]-W[0]
post = np.ones([Nc, Nw])
mld = np.zeros(Nc)
for ic in range(Nc):
    mld[ic] = 0
    for i,w in enumerate(W):
        post[ic,i] = llhd_i[i]*prior(w, ic)
        mld[ic] += post[ic,i]*dw
    post[ic,:] /= mld[ic]
post[post>y_inf] = 1.

In [35]:
plt.figure()
plt.plot(W, post[0, :])

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x1235c9c10>]

In [36]:
#XH: compute posterior for model 2
dw2 = W2[1]-W2[0]
post2 = np.ones([Nc, Nw])
mld2 = np.zeros(Nc)
for ic in range(Nc):
    mld2[ic] = 0
    for i,w in enumerate(W2):
        post2[ic,i] = llhd2_i[i]*prior(w, ic)
        mld2[ic] += post2[ic,i]*dw2
    post2[ic,:] /= mld2[ic]
post2[post>y_inf] = 1.

In [90]:
fig = plt.figure(figsize=(4.5,3))
plt.plot(W, post[0,:], 'r-', label=r'$w_1$', linewidth=1.8)
plt.plot(W2, post2[0,:], 'b-', label=r'$w_2$', linewidth=1.8)
plt.legend(fontsize=13)
plt.xlabel(r'$w$',fontsize=13)
plt.ylabel('PDF',fontsize=13)
plt.xlim(-0.5,10.5)
plt.ylim(-0.05,1.6)
plt.xticks(fontsize=13)
plt.yticks([0,0.3,0.6,0.9,1.2,1.5],fontsize=13)
fig.savefig('../figs-new/pw1pw2.pdf',bbox_inches='tight')
plt.show()

<IPython.core.display.Javascript object>

In [49]:
# normlize mld to find probability for each model
#P1 = mld[case_init]/(mld[case_init] + mld2[case_init])
#P2 = mld2[case_init]/(mld[case_init] + mld2[case_init])
P1 = mld[case_init]*beta/(mld[case_init]*beta + mld2[case_init]*(1-beta))
P2 = mld2[case_init]*(1-beta)/(mld[case_init]*beta + mld2[case_init]*(1-beta))
P1, P2

(0.4481791109958257, 0.5518208890041744)

In [50]:
mld / (mld + mld2), mld2 / (mld + mld2)

(array([0.44817911, 0.44817911, 0.99999931, 1.        ]),
 array([5.51820889e-01, 5.51820889e-01, 6.86620250e-07, 6.21902899e-11]))

In [85]:
xq=3
yq = np.linspace(xq*w_min, np.sqrt(xq)*w_max, Nw)
pw1interp = interpolate.interp1d(W, post[0, :], kind='linear')
pw2interp = interpolate.interp1d(W2, post2[0, :], kind='linear')
pyq1=pw1interp(yq/xq)/xq
pyq2=pw2interp(yq/np.sqrt(xq))/np.sqrt(xq)
pyq=pyq1*P1+pyq2*P2              

In [86]:
yt = beta * slope*xq + (1-beta) * np.sqrt(xq) *slope2
ym1=(np.sum(yq*pyq1)-yq[0]*pyq1[0]/2-yq[Nw-1]*pyq1[Nw-1]/2)*(yq[2]-yq[1])
ym2=(np.sum(yq*pyq2)-yq[0]*pyq2[0]/2-yq[Nw-1]*pyq2[Nw-1]/2)*(yq[2]-yq[1])
ym=(np.sum(yq*pyq)-yq[0]*pyq[0]/2-yq[Nw-1]*pyq[Nw-1]/2)*(yq[2]-yq[1])


In [87]:
(np.sum(pyq)-pyq[0]/2-pyq[Nw-1]/2)*(yq[2]-yq[1])

1.0000000026837637

In [88]:
yt,ym1,ym2,ym

(11.428203230275509, 12.24490441837343, 10.638028596886462, 11.35819677404118)

In [89]:
fig = plt.figure(figsize=(4.5,3))
plt.plot(yq, pyq1, 'r-', label=r'$y_1$', linewidth=1.8)
plt.plot(yq, pyq2, 'b-', label=r'$y_2$', linewidth=1.8)
plt.plot(yq, pyq, 'm-', label=r'$y_a$', linewidth=1.8)
plt.plot(np.array([yt, yt]),np.array([0, 0.8]),'g-.', label=r'$y_t$', linewidth=1.8)
plt.xlabel(r'$y$',fontsize=13)
plt.ylabel('PDF',fontsize=13)
plt.legend(fontsize=12)
plt.xticks(fontsize=13)
plt.yticks(fontsize=13)
fig.savefig('../figs-new/xq={}.pdf'.format(xq),bbox_inches='tight')
plt.show()
# plt.text(0,0.75,"$x_q=4$")
# plt.text(0,0.7,"$y_t=14.0$")
# plt.text(0,0.65,"E$[y_1]=16.3216$")
# plt.text(0,0.6,"E$[y_2]=12.2837$")
# plt.text(0,0.55,"E$[y_a]=14.0934$")

<IPython.core.display.Javascript object>

In [56]:
fig = plt.figure(figsize=(10,5))
ax1 = fig.add_subplot(1, 2, 1)
plt.xlabel('w')
plt.ylabel('Prior PDF')
ax1.set_xlim([w_min, w_max])
ax1.set_ylim([0.,y_lim])
ax2 = fig.add_subplot(1, 2, 2)
plt.xlabel('w')
plt.ylabel('Prior PDF')
ax2.set_xlim([w_min, w_max])
ax2.set_ylim([0.,y_lim])

line1, = ax1.plot(W, prior_i, 'C1-', label='prior (belief)')
ax1.plot(W, llhd_i/llhd_int, 'C0-', label='likelyhood (data)', alpha=0.25)
ax1.legend()
line2, = ax2.plot(W, prior_i, 'C1-', label='prior (belief)', alpha=0.25)
ax2.plot(W, llhd_i/llhd_int, 'C0-', label='likelyhood (data)', alpha=0.25)
line3, = ax2.plot(W, post[case_init, :], 'C2-', label='posterior')
ax2.legend()

# Widget
def update(case = case_init):
    prior_i = np.array([prior(W[i], case) for i in range(Nw)])
    line1.set_ydata(prior_i)
    line2.set_ydata(prior_i)
    line3.set_ydata(post[case, :])
    fig.canvas.draw()
widgets.interact(update, case={0:0,1:1,2:2,3:3});

<IPython.core.display.Javascript object>

  ax1.set_ylim([0.,y_lim])
  ax2.set_ylim([0.,y_lim])


interactive(children=(Dropdown(description='case', options={0: 0, 1: 1, 2: 2, 3: 3}, value=0), Output()), _dom…