In [None]:
!pip install corner nestle

In [None]:
# import all packages
import corner,nestle
import pandas
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# read in some data we created for this example (.dat is a generic filename, it's just a text file)
data_filename='https://raw.githubusercontent.com/uofscphysics/STEM_Python_Course/Summer2020/02_Week2/Data/1D_intro_examples.dat'
example_data_1D = pandas.read_csv(data_filename,sep=',',header=0)#this file is separated by spaces and its first line contains the names of the columns (header) 
print(example_data_1D.head())

In [None]:
#Let's plot the data, with error bars, that we read from file (See Day 2)
plt.errorbar(example_data_1D['x'], #x,y,and error are the column names
             example_data_1D['y'], 
             yerr=example_data_1D['error'],#yerr denotes an error in the y-direction for plotting
             fmt='.') #fmt is "format", saying that I want data marked by "points"
plt.xlabel('x') #set the x-axis label 
plt.ylabel('y') #set the y-axis label
plt.show()

In [None]:
#The data were generated with a simple quadratic equation:
#ax^2+bx+c. 
def my_model(x,a,b,c): #We define the model described above
    return(a*x**2+b*x+c)

#### An alternative to MCMC is Bayesian Nested Sampling. It is extremely similar but randomly samples the full parameter space instead of sending out individual walkers. MCMC is often better for a many-dimensional model.

#### Now create such a likelihood function. Remember that before we used a $\chi^2$ function to estimate best-fit parameters. In that case, we wanted to minimize the $\chi^2$ statistic to find best-fit parameters. Now we want a likelihood function maximize instead of minimize, but the principle is the same (calculating how well a given choice of parameters reproduces the observed data given your model). Therefore, you can just use the opposite (negative) of your $\chi^2$ function as your likelihood function. For now, just leave your function as-is (we can add the negative in later).

In [None]:

def chisq_likelihood(theta, args):
    #This function accepts an argument "theta", which is 
    #a list of model parameters a, b and c. It then calculates
    #a chi-square statistic that it returns, which compares
    #the observations, errors, and model provided in args.
    
    x, y, yerr = args #args is a list, so this is the same as x=args[0],y=args[1],yerr=args[2]. x,y, and yerr are numpy arrays
    a,b,c = theta #theta is also a list, so it follows the same as args above
    model_observations = my_model(x,a,b,c) 
    inv_sigma2 = 1./yerr**2 #The chi-square statistic contains an inverse-square error, which we calculate here
    chisquare = np.sum((y-model_observations)**2*inv_sigma2 )#calculate the chi-square statistic. 
    return chisquare




#### Nestle requires a function to maximize with only 1 argument. Create a `loglike` function that accepts one argument (the array of parameter guesses), then returns the $\chi^2$ result using your $\chi^2$-likelihood function.

In [None]:
# Define a likelihood function. Let's use a chi-square again.
# The package doing the fitting needs a likelihood function
# that only accepts the parameters, so we can create this 
# small wrapper function that calls our original chisq_likelihood
# function with the necessary arguments.
def loglike(theta):
    args=(example_data_1D['x'],
          example_data_1D['y'],
          example_data_1D['error'])
    chisq = chisq_likelihood(theta,args)
    #again we need to take the negative of the chi-square because we need a function to maximize
    #and the 0.5 comes from the definition of a Gaussian distribution
    return -0.5*chisq 

#### Now we want to apply bounds as we did for an MCMC. This can be a bit difficult to understand. We are using the same bounds on our parameters as before here `[a=(-20,20),b=(-20,20),c=(-20,20)]`. However, nested sampling always reduces the problem to the range (0,1) for every parameter. Then we always need to provide a function prior_transform, which applies our prior to the model and simultaneously tranforms from this unit space, to our actual parameter space. This essentially means rescaling the parameter from the range (0,1) to the range (-20,20).

#### For example, we want a uniform prior on b, meaning bounds from -20 to 20. The algorithm will only try values for b from 0 to 1. Suppose it tries b=0.5. That's halfway between 0 and 1, which means it is halfway between our bounds (-20,20), which is 0, not 0.5. So we need to find a way to change 0.5-->0. Take a second choice of this (guess b=1$\rightarrow$20 for example), allowing you to solve this system of equations for the necessary parameters. I'll fill this function in for you, then see if you can work out how to write your own for another scenario. 

In [None]:
def prior_transform(parameters):
     return np.array([40, 40, 40]) * parameters + np.array([-20, -20, -20])


#### Now all we need is to run the nested sampler. We give it the our likelihood function, our prior transformation function, the number of dimensions (model parameters), and the number of "points". The number of points is analagous to the walker/sample number for MCMC (i.e. increasing it should result in a slower but more accurate fit). 

In [None]:
# Run nested sampling.
result_nest = nestle.sample(loglike, prior_transform, ndim=3,npoints=500)

#### Okay let's see what we got! 

In [None]:
#So what did we actually get from this? Let's use another
#python package to see the output of the nested sampling.
#The dashed line is one way of estimating the best-fit parameters
#(the median of the samples). 
fig = corner.corner(result_nest.samples, #samples is defined above
                    weights=result_nest.weights,
                    labels=["$a$", "$b$","$c$"],#parameter labels
                    quantiles=[.5],
                    plot_contours=False,plot_density=False,
                    plot_datapoints=True)


plt.show()

In [None]:
#But how close did we get to the true values?
#We can take the 50th percentile of the distributions you see above
#As our result. Incidentally we could calculate the uncertainty as 
#well, perhaps as the differences between the 84th and 50th for
#the upper uncertainty, and 50th and 16th for the lower uncertainty

#axis=0 means we want to calculate percentiles along columns, not rows
a_nest, b_nest,c_nest = np.percentile(result_nest.samples, 50,axis=0)
a_nest_lower, b_nest_lower,c_nest_lower = np.percentile(result_nest.samples, 16,axis=0)
a_nest_upper, b_nest_upper,c_nest_upper = np.percentile(result_nest.samples, 84,axis=0)

print('a:',a_nest,'±(',a_nest_upper-a_nest,a_nest-a_nest_lower,')')
print('b:',b_nest,'±(',b_nest_upper-b_nest,b_nest-b_nest_lower,')')
print('c:',c_nest,'±(',c_nest_upper-c_nest,c_nest-c_nest_lower,')')


#### Plot the result. Show the data (with error bars), with your best-fit line going through. How does it look? You could compare it to your $\chi^2$ result if you want.

In [None]:
#set up plotting the model over the data
plt.errorbar(example_data_1D['x'],
             example_data_1D['y'],
             yerr=example_data_1D['error'],
             fmt='.',
             label='Data')

plt.plot(example_data_1D['x'],
         my_model(example_data_1D['x'],a_nest,b_nest,c_nest),
         'r--',#make the line green and dashed
         label='Nestle Fit')

plt.xlabel('x')
plt.ylabel('y')
plt.legend()

plt.show()

#### Not bad! Play with the `npoints`  to see what it does to computation time and accuracy. What extra information do we get from this approach, vs. the simple $\chi$-square minimization?