In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from thermoextrap import *
import thermoextrap


In [11]:
# xtrapy stuff:
# note, just using single module right now.  Can retinker
import thermoextrap.xtrapy.core as core
import thermoextrap.xtrapy.xpan_beta as xpan_beta
import xarray as xr
from importlib import reload

In [12]:
order = 6
fs = [thermoextrap.symDerivAvgX(i) for i in range(order+1)]

In [13]:
# test Data
# samples
n = int(1e5)
# number of x values
nv = 5

_u_offset = 0.0
_x_offset = 0.0


np.random.seed(0)
u = np.random.rand(n) + _u_offset
x = np.random.rand(n, nv) + _x_offset

# second set, to testing
ub = np.random.rand(n) + _u_offset
xb = np.random.rand(n, nv) + _x_offset

# comparison of xtrapy

## Data averages

In [53]:
reload(core)
reload(xpan_beta)

<module 'thermoextrap.xtrapy.xpan_beta' from '/Users/wpk/Documents/python/projects/thermodynamic-extrapolation/thermoextrap/xtrapy/xpan_beta.py'>

In [15]:
# Test "coefs"
ufunc, xufunc = thermoextrap.buildAvgFuncs(x, u, order)
coefs_list = [fs[i](ufunc, xufunc) for i in range(order+1)]

In [54]:
# coefficient class handler
# raw moments

# coefficient builder
s = xpan_beta.factory_coefs(xalpha=False, central=False)
# data object
data = xpan_beta.factory_data(u, x, order=order, chunk=1000)

In [55]:
# Note very slight difference in averages.
# this is due to numerics
print(data.u - [ufunc(i) for i in range(order+1)])
print(data.xu - [xufunc(i) for i in range(order+1)])

<xarray.DataArray 'u' (moment: 7)>
array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       5.55111512e-17, 2.77555756e-17, 0.00000000e+00])
Dimensions without coordinates: moment
<xarray.DataArray 'x' (moment: 7, val: 5)>
array([[ 1.11022302e-15,  6.66133815e-16, -3.33066907e-16,
         6.77236045e-15, -4.77395901e-15],
       [-1.49880108e-15, -3.88578059e-16,  0.00000000e+00,
        -5.55111512e-17, -1.27675648e-15],
       [ 2.77555756e-17, -1.99840144e-15,  1.44328993e-15,
         1.05471187e-15,  2.49800181e-16],
       [ 4.71844785e-16,  9.43689571e-16,  1.44328993e-15,
         1.80411242e-16, -7.21644966e-16],
       [ 1.66533454e-15, -1.04083409e-15,  1.33226763e-15,
        -7.63278329e-16,  1.98452366e-15],
       [ 3.74700271e-16, -4.85722573e-16,  4.30211422e-16,
        -1.20736754e-15, -2.77555756e-17],
       [ 2.77555756e-16,  6.52256027e-16,  4.16333634e-17,
         2.91433544e-16,  8.32667268e-16]])
Dimensions without coordinates: moment, v

## New data models!

have included two new data models.  These are based on the cmomy (central moment) python package.
This package handles co-moment calculation, combination, resampling, etc.
There are two base classes.  DataStatsCovVals and DataStatsCov.  The first is for 
working with individual samples (like factory_data method above).  The second is for
working with pre-averaged data (i.e., average co-moments coming from simulation). This latter class has several 
constructor methods to handle raw and central co-moments, and has xarray support to make things clear.


Note that the ordering of axes is different for 

In [56]:
# working with direct samples
# note that under the hood, numpy arrays are wrapped to xarray objects using the same method
# as above
xdata = xpan_beta.DataStatsCovVals(uv=u, xv=x, order=order, central=False)

In [57]:
print(np.abs(xdata.xu - data.xu).max())
print(np.abs(xdata.u  - data.u).max())

<xarray.DataArray 'x' ()>
array(5.21804822e-15)
<xarray.DataArray ()>
array(6.35602682e-15)


In [58]:
# instead, we can work directly with already averaged data.
# lets pretend we have averaged data

tmp_data = xpan_beta.factory_data(
    uv = xr.DataArray(u.reshape(100, n // 100), dims=['rec','ave']),
    xv = xr.DataArray(x.reshape(100, n // 100, 5), dims=['rec','ave', 'val']),
    order=order, central=False, rec='ave', rep='rec',
)



In [78]:
# note, don't have to pass w here, as it is uniform, but will for comparison purposes
xdata2 = xpan_beta.DataStatsCov.from_ave_raw(u=tmp_data.u, xu=tmp_data.xu, w=n//100)

In [83]:
xr.testing.assert_allclose(xdata2.reduce('rec').values, xdata.values)

In [73]:
# can resample along rec
out = xdata2.resample(nrep=20, axis='rec')

In [77]:
# note that xdata 2 has still has a 'rec' dimension
# to compare, reduce along this axis
# reduce along 'rec'
xdata3 = xdata2.reduce('rec')

xdata3.xu - data.xu

In [92]:
# can also directly create this object from unaveraged values
# note that 'dims' is the final dimension names for all dimensions except mom_x, and moment
xdata4 = xpan_beta.DataStatsCov.from_vals(uv=u, xv=x, axis=0, dims=['val'], order=order)

## coefficients of expansion

In [93]:
# norm = True, include 1/ n!  factor
# if want straight coeffs, use norm=False
c = s.xcoefs(data, norm=False) 
c

In [94]:
# test equal to old way
np.testing.assert_allclose(c, coefs_list, rtol=1e-5)

In [95]:
# instead, can use central moments
s_c = xpan_beta.factory_coefs(central=True)

In [96]:
data_c = xpan_beta.factory_data(u, x, order=order, central=True)

c_c = s_c.xcoefs(data_c, norm=False)

In [97]:
xdata_c = xpan_beta.DataStatsCovVals(uv=u, xv=x, order=order, central=True)

xc_c = s_c.xcoefs(xdata_c, norm=False)

In [98]:
# test close
np.testing.assert_allclose(s.xcoefs(data), s_c.xcoefs(data_c))
np.testing.assert_allclose(s.xcoefs(data), s_c.xcoefs(xdata_c))


# nice-ness of central moments

In [180]:
# coeffs using raw moments u[i], xu[i]
for i in range(order+1):
    print('{}: {}'.format(i, s.exprs[i]))

0: xu[0]
1: u[1]*xu[0] - xu[1]
2: 2*u[1]**2*xu[0] - 2*u[1]*xu[1] - u[2]*xu[0] + xu[2]
3: 6*u[1]**3*xu[0] - 6*u[1]**2*xu[1] - 6*u[1]*u[2]*xu[0] + 3*u[1]*xu[2] + 3*u[2]*xu[1] + u[3]*xu[0] - xu[3]
4: 24*u[1]**4*xu[0] - 24*u[1]**3*xu[1] - 36*u[1]**2*u[2]*xu[0] + 12*u[1]**2*xu[2] + 24*u[1]*u[2]*xu[1] + 8*u[1]*u[3]*xu[0] - 4*u[1]*xu[3] + 6*u[2]**2*xu[0] - 6*u[2]*xu[2] - 4*u[3]*xu[1] - u[4]*xu[0] + xu[4]
5: 120*u[1]**5*xu[0] - 120*u[1]**4*xu[1] - 240*u[1]**3*u[2]*xu[0] + 60*u[1]**3*xu[2] + 180*u[1]**2*u[2]*xu[1] + 60*u[1]**2*u[3]*xu[0] - 20*u[1]**2*xu[3] + 90*u[1]*u[2]**2*xu[0] - 60*u[1]*u[2]*xu[2] - 40*u[1]*u[3]*xu[1] - 10*u[1]*u[4]*xu[0] + 5*u[1]*xu[4] - 30*u[2]**2*xu[1] - 20*u[2]*u[3]*xu[0] + 10*u[2]*xu[3] + 10*u[3]*xu[2] + 5*u[4]*xu[1] + u[5]*xu[0] - xu[5]
6: 720*u[1]**6*xu[0] - 720*u[1]**5*xu[1] - 1800*u[1]**4*u[2]*xu[0] + 360*u[1]**4*xu[2] + 1440*u[1]**3*u[2]*xu[1] + 480*u[1]**3*u[3]*xu[0] - 120*u[1]**3*xu[3] + 1080*u[1]**2*u[2]**2*xu[0] - 540*u[1]**2*u[2]*xu[2] - 360*u[1]**2*u[3]*xu[1]

In [181]:
# coeffs using central moments du[i] = <(u-<u>)**i> 
for i in range(order+1):
    print('{}: {}'.format(i, s_c.exprs[i]))

0: x1
1: -dxdu[1]
2: dxdu[2]
3: 3*du[2]*dxdu[1] - dxdu[3]
4: -6*du[2]*dxdu[2] - 4*du[3]*dxdu[1] + dxdu[4]
5: -30*du[2]**2*dxdu[1] + 10*du[2]*dxdu[3] + 10*du[3]*dxdu[2] + 5*du[4]*dxdu[1] - dxdu[5]
6: 90*du[2]**2*dxdu[2] + 120*du[2]*du[3]*dxdu[1] - 15*du[2]*dxdu[4] - 20*du[3]*dxdu[3] - 15*du[4]*dxdu[2] - 6*du[5]*dxdu[1] + dxdu[6]


In [182]:
# so not only are central moments more stable, the resulting expressions
# are way simplier!

# taking advantage of xarray

In [104]:
# NOTE: have not implemented this for StatsCov objects yet
# TODO: implement dataset stuff for StatsCov objects...

In [105]:
# make a dataset of two values:

x_set = xr.merge((core.xrwrap_xv(xx, name=name) 
                  for xx, name in zip([x, xb], ['a','b'])))
print(x_set)

data_set = xpan_beta.factory_data(u, x_set, order)
data_set_c = xpan_beta.factory_data(u, x_set, order, central=True)


# need "other" x for comparison
datab = xpan_beta.factory_data(u, xb, order)
xdatab = xpan_beta.DataStatsCovVals(uv=u, xv=xb, order=order, central=False)

datab_c = xpan_beta.factory_data(u, xb, order, central=True)
xdatab_c = xpan_beta.Data

<xarray.Dataset>
Dimensions:  (rec: 100000, val: 5)
Dimensions without coordinates: rec, val
Data variables:
    a        (rec, val) float64 0.5353 0.904 0.5024 ... 0.7553 0.2138 0.7261
    b        (rec, val) float64 0.6237 0.1782 0.2007 ... 0.08856 0.0782 0.4235


In [106]:
# note that data_set has multiple measurements in it.  Very nice...
print(data_set.xv)
print(data_set.xu)

<xarray.Dataset>
Dimensions:  (rec: 100000, val: 5)
Dimensions without coordinates: rec, val
Data variables:
    a        (rec, val) float64 0.5353 0.904 0.5024 ... 0.7553 0.2138 0.7261
    b        (rec, val) float64 0.6237 0.1782 0.2007 ... 0.08856 0.0782 0.4235
<xarray.Dataset>
Dimensions:  (moment: 7, val: 5)
Dimensions without coordinates: moment, val
Data variables:
    a        (moment, val) float64 0.5004 0.499 0.5004 ... 0.07138 0.07166
    b        (moment, val) float64 0.5001 0.5015 0.5001 ... 0.07127 0.07149


In [107]:
ufuncb, xufuncb = thermoextrap.buildAvgFuncs(xb, u, order)
coefs_listb = [fs[i](ufuncb, xufuncb) for i in range(order+1)]

In [108]:
# make sure have same value
np.testing.assert_allclose(s.xcoefs(datab,  norm=False), coefs_listb)

In [109]:
# consider dataset
c_set = s.xcoefs(data_set, norm=False)
c_set

In [110]:
# testing
cb = s.xcoefs(datab, norm=False)
np.testing.assert_allclose(c_set.a, c)
np.testing.assert_allclose(c_set.b, cb)

In [111]:
# central
c_set_c = s_c.xcoefs(data_set_c, norm=False)
# test
xr.testing.assert_allclose(c_set, c_set_c)

# resampling

In [112]:
# explicit resample index

In [113]:
idx = core.resample_indicies(size=len(data), nrep=10)

out = data.resample(indices=idx)

xout = xdata.resample(indices=idx)

xr.testing.assert_allclose(xout.xu.transpose(*out.xu.dims),out.xu)

In [114]:
# testing resampling:
# use same resampler for all
idx = core.resample_indicies(len(data), nrep=10)

In [115]:
c_r = s.xcoefs(data.resample(indices=idx))
cb_r = s.xcoefs(datab.resample(indices=idx))
cs_r = s.xcoefs(data_set.resample(indices=idx))

In [116]:
np.testing.assert_allclose(cs_r.a, c_r)
np.testing.assert_allclose(cs_r.b, cb_r)

In [117]:
# center
cs_c_r = s_c.xcoefs(data_set_c.resample(indices=idx))

In [118]:
xr.testing.assert_allclose(cs_r, cs_c_r)

# resampling time and chunking

In [119]:
# xarray is built on dask.  I'm no expert, that this speed things up
# use chunking to create a dask array of uv, xv

In [212]:
idx = core.resample_indicies(len(data), nrep=200)

In [213]:
%%timeit -n 1 -r 1
r = data.resample(indices=idx, chunk=None)
r.u
r.xu

3.33 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [214]:
%%timeit -n 1 -r 1
r = data.resample(indices=idx, chunk=10000)
r.u
r.xu

3.26 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [215]:
%%timeit -n 1 -r 1
# note that xdata does not use chunking
# but for reasonably sized data, is faster
# might want to use the dask backended other method
# for big data problems.
r = xdata.resample(indices=idx)
r.u
r.xu

2.18 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Extrap model

In [120]:
reload(core)
reload(xpan_beta)

<module 'thermoextrap.xtrapy.xpan_beta' from '/Users/wpk/Documents/python/projects/thermodynamic-extrapolation/thermoextrap/xtrapy/xpan_beta.py'>

In [121]:
# test Extrap model
betas = [0.3, 0.4]
ref_beta = 0.5

In [122]:
# blank model
em = thermoextrap.ExtrapModel(maxOrder=order)

In [123]:
params = em.train(ref_beta, xData=x, uData=u, saveParams=True)

In [124]:
em.predict(betas, order=3)

array([[0.50043757, 0.49897351, 0.50042238, 0.49912602, 0.50117469],
       [0.50042413, 0.49898436, 0.50040218, 0.49912628, 0.50116313]])

In [125]:
xem = xpan_beta.factory_extrapmodel(alpha0=ref_beta, 
                                    order=order, uv=u, xv=x, xalpha=False, central=False)#, data=None)
xem.predict(betas, order=3)

In [129]:
# central
xem_c = xpan_beta.factory_extrapmodel(alpha0=ref_beta, order=order, uv=u, xv=x, xalpha=False, central=True, data=None)
xem_c.predict(betas, order=3)

In [130]:
# using other data model

# from values, raw
xdata = xpan_beta.DataStatsCovVals(xv=x, uv=u, order=order, central=False)
xem2 = xpan_beta.factory_extrapmodel(alpha0=ref_beta, data=xdata, central=False)

# from values, central
xdata_c = xpan_beta.DataStatsCovVals(xv=x, uv=u, order=order, central=True)
xem2_c = xpan_beta.factory_extrapmodel(alpha0=ref_beta, data=xdata_c, central=True)

In [127]:
xem2.predict(betas, order=3)

In [128]:
xem2_c.predict(betas, order=3)

In [254]:
# resample
em.bootstrap(betas, n=20)

array([[0.00089247, 0.0008622 , 0.00077916, 0.00071364, 0.00108004],
       [0.00089258, 0.00086074, 0.0007747 , 0.00070903, 0.00107836]])

In [131]:
idx = core.resample_indicies(len(xem.data), nrep=20)

In [132]:
# resample data
xem_r = xem.resample(indices=idx)
xem_r.predict(betas).std('rep')

In [133]:
xem_c.resample(indices=idx).predict(betas).std('rep')

In [134]:
xem2.resample(indices=idx).predict(betas).std('rep')

In [135]:
xem2_c.resample(indices=idx).predict(betas).std('rep')

In [142]:
# testing dataset
emb = thermoextrap.ExtrapModel(order, ref_beta, xb, u)

In [144]:
xemb = xpan_beta.factory_extrapmodel(order=order, alpha0=ref_beta, uv=u, xv=xb, central=True)
xem_set = xpan_beta.factory_extrapmodel(order=order, alpha0=ref_beta, uv=u, xv=x_set, central=True)

In [145]:
np.testing.assert_allclose(emb.predict(betas), emb.predict(betas))

In [146]:
# testing dataset
out = xem.predict(betas)
outb = xemb.predict(betas)
outs = xem_set.predict(betas)

np.testing.assert_allclose(out, outs.a)
np.testing.assert_allclose(outb, outs.b)

In [149]:
# resampling
idx = core.resample_indicies(len(x), 20)

out = xem.resample(indices=idx).predict(betas)
outb = xemb.resample(indices=idx).predict(betas)
outs = xem_set.resample(indices=idx).predict(betas)

In [150]:
np.testing.assert_allclose(out, outs.a)
np.testing.assert_allclose(outb, outs.b)

In [151]:
# resampling much faster with chunking

In [152]:
%%timeit -n 1 -r 1
print(em.bootstrap(betas))

[[0.00088025 0.00089694 0.00092583 0.00088276 0.00088621]
 [0.00087916 0.00089487 0.00093036 0.00088284 0.00088431]]
4.19 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [154]:
%%timeit -n 1 -r 1
print(xem.resample(nrep=100, chunk=1000).predict(betas).std('rep'))

<xarray.DataArray (beta: 2, val: 5)>
array([[0.00086409, 0.00087791, 0.00095681, 0.00100865, 0.00084279],
       [0.00086301, 0.00087756, 0.00095509, 0.00100553, 0.00084066]])
Coordinates:
  * beta     (beta) float64 0.3 0.4
    dalpha   (beta) float64 -0.2 -0.1
    beta0    float64 0.5
Dimensions without coordinates: val
2.18 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [156]:
%%timeit -n 1 -r 1
xem2.resample(nrep=100).predict(betas).std('rep')

1.32 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# ExtrapWeighted

In [157]:
u = np.random.rand(2,n) + 5
x = np.random.rand(2,n, nv) + 10
xb = np.random.rand(2,n, 5) + 2
order = 4

In [158]:
beta0 = [0.05, 0.5]
betas = [0.1, 0.2, 0.3, 0.4]

In [159]:
emw = thermoextrap.ExtrapWeightedModel(order, beta0, x, u)
emwB = thermoextrap.ExtrapWeightedModel(order, beta0, xb, u)

In [163]:
# for xtrapy, create list of models for each state:

# first create datasets for xs
xs = [xr.Dataset({'a': core.xrwrap_xv(xx), 
                  'b': core.xrwrap_xv(xxb)})
      for xx, xxb in zip(x, xb)
     ]

# now crete list of models for each state
states = [xpan_beta.factory_extrapmodel(order=order, alpha0=b0, uv=uu, xv=xx, central=True)
         for b0, uu, xx in zip(beta0, u, xs)]

xemw = core.ExtrapWeightedModel(states)

In [167]:
states_a = [xpan_beta.factory_extrapmodel(order=order, alpha0=b0, uv=uu, xv=xx, central=True)
             for b0, uu, xx in zip(beta0, u, x)]
xemw_a = core.ExtrapWeightedModel(states_a)

In [173]:
states_a[0]

<thermoextrap.xtrapy.core.ExtrapModel at 0x7faa80c7ded0>

In [168]:
np.testing.assert_allclose(emw.predict(betas), xemw.predict(betas).a)
np.testing.assert_allclose(emwB.predict(betas), xemw.predict(betas).b)

In [38]:
idxs = [core.resample_indicies(len(xemw[0].data), 20) for i in range(2)]
a = xemw.resample(None, idxs).predict(betas).a
b = xemw_a.resample(None, idxs).predict(betas)

In [39]:
%%timeit -n 1 -r 1
emw.bootstrap(betas, n=100)

6.4 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [72]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw.resample(100).predict(betas).std('rep')

7.37 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [73]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw.resample(100, chunk=1000).predict(betas).std('rep')

6.18 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [74]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw_a.resample(100, chunk=10000).predict(betas).std('rep')

3.26 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [70]:
from dask.diagnostics import ProgressBar

In [71]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
with ProgressBar():
    xemw_a.resample(500, chunk=10000).predict(betas).std('rep')

[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.6s
[########################################] | 100% Completed |  4.6s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.7s
[########################################] | 100% Completed |  2.5s
22.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# InterpModel

In [40]:
emi = thermoextrap.InterpModel(order, beta0, x, u)
emib = thermoextrap.InterpModel(order, beta0, xb, u)

In [41]:
xemi = core.InterpModel(states)

In [42]:
states_b = [xpan_beta.factory_extrapmodel(order, b0, uu, xx, central=True)
             for b0, uu, xx in zip(beta0, u, xb)]

xemi_a = core.InterpModel(states_a)
xemi_b = core.InterpModel(states_b)


In [43]:
alpha = [0.1, 0.2]

In [44]:
np.testing.assert_allclose(emi.predict(betas), xemi.predict(betas).a)
np.testing.assert_allclose(emib.predict(betas), xemi.predict(betas).b)

In [45]:
emi.bootstrap(betas, n=20)

array([[0.00073394, 0.00082176, 0.00086818, 0.0007307 , 0.00098333],
       [0.00064477, 0.00069775, 0.00075103, 0.00060203, 0.00087918],
       [0.00067547, 0.00045663, 0.00084683, 0.00053983, 0.00074061],
       [0.00095478, 0.00059577, 0.00120691, 0.00084186, 0.0008883 ]])

In [46]:
xemi.resample(20).predict(betas).std('rep').a

In [82]:
%%timeit -n 1 -r 1
emi.bootstrap(betas, n=100)

6.77 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [83]:
%%timeit -n 1 -r 1
xemi_a.resample(100, chunk=10000).predict(betas).std('rep')

3.25 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# MBAR

In [48]:
mbar = thermoextrap.MBARModel(refB=beta0, xData=x, uData=u)

In [49]:
mbar.predict(betas)

array([[10.49946905, 10.50009106, 10.49990332, 10.50020218, 10.50030235],
       [10.49945993, 10.50010948, 10.49990185, 10.50022119, 10.50031344],
       [10.49945098, 10.50012735, 10.49989977, 10.50024016, 10.50032404],
       [10.49944219, 10.50014468, 10.49989709, 10.50025908, 10.50033412]])

In [50]:
xmbar = core.MBARModel(states_a)

In [51]:
out = xmbar.predict(betas)

In [52]:
out

In [53]:
np.testing.assert_allclose(mbar.predict(betas), xmbar.predict(betas))

# Perturb

In [54]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [55]:
beta_ref = 0.5

In [56]:
pm = thermoextrap.PerturbModel(beta_ref, x[0], u[0])

In [57]:
pm.predict([0.1, 0.2], useMBAR=False)

array([[10.49888945, 10.49920956, 10.49992106, 10.4998412 , 10.4996373 ],
       [10.4988958 , 10.49927884, 10.49991802, 10.49984427, 10.49963441]])

In [58]:
xpm = xpan_beta.factory_perturbmodel(beta_ref, u[0], x[0])

In [59]:
xpm.predict([0.1, 0.2])

# log func

In [60]:
from thermoextrap.utilities import buildAvgFuncs

#For quantities like the chemical potential, we're interested in the -log(<X>), not <X>
#Everything is the same, but we take derivatives differently
#Luckily, have closed-form expression for derivatives of -log(<X>) in terms of derivatives of <X>
#Specifically, d(n)[-log(<X>)]/dB(n) = Sum(k=1, n)[(k-1)! * (-1/<X>)^k * B(n,k,(d<X>/dB, ..., d(n-k+1)<X>/dB(n-k+1)))]
#B(n,k, (...)) represents Bell Polynomials, which are implemented in sympy (not numpy or scipy unfortunately)
#Create custom classes to handle this
from sympy import bell

class LogAvgExtrapModel(ExtrapModel):
    
    def calcDerivVals(self, refB, x, U):
        
        if x.shape[0] != U.shape[0]:
            print('First observable dimension (%i) and size of potential energy array (%i) do not match!'%(x.shape[0], U.shape[0]))
            return
    
        avgUfunc, avgXUfunc = buildAvgFuncs(x, U, self.maxOrder)
        derivVals = np.zeros((self.maxOrder+1, x.shape[1]))
        for o in range(self.maxOrder+1):
            if o == 0:
                derivVals[o] = (-np.log(avgXUfunc(0)))
                continue
            for k in range(1,o+1):
                #Get the derivatives of the average quantity
                thisDiffs = np.array([self.derivF[l](avgUfunc, avgXUfunc) for l in range(1, o-k+2)])
                #Loop to apply the chain rule to each element of the observable array
                for l in range(x.shape[1]):
                    derivVals[o,l] += np.math.factorial(k-1)*((-1/avgXUfunc(0)[l])**k)*bell(o, k, thisDiffs[:,l])
                
        return derivVals


In [61]:
betas

[0.1, 0.2, 0.3, 0.4]

In [62]:
xdata = xem.data.xv.values
udata = xem.data.uv.values
refBeta=0.5

In [63]:
#Create and train extrapolation model
extModelLog = LogAvgExtrapModel(maxOrder=4, refB=refBeta, 
                                xData=xdata,
                                uData=udata,
                                )

#Note that we handled the -log calculation in the definition of the derivatives (even at zeroth order).
#This means we want to just pass data, not the -log of the data.

#Check the parameters
print("Model parameters (derivatives):")
print(extModelLog.params)
print('\n')

#Finally, look at predictions
print("Model predictions:")
print(extModelLog.predict(betas, order=2))
print('\n')

Model parameters (derivatives):
[[ 6.92326220e-01  6.95158101e-01  6.92383757e-01  6.94896673e-01
   6.90846049e-01]
 [ 2.69773834e-04 -2.27673014e-04  4.07282201e-04  1.07002690e-05
   2.20961626e-04]
 [ 4.48585482e-06 -6.47472658e-05  2.02998635e-05  1.12768749e-04
  -6.90717840e-05]
 [-5.07388591e-05  5.69296180e-05 -5.54391284e-05  9.44529753e-05
  -6.20967030e-05]
 [ 1.05400889e-05  2.10718898e-05 -1.32468610e-05 -4.42767814e-05
   1.91285621e-05]]


Model predictions:
[[0.69221867 0.69524399 0.69222247 0.69490141 0.69075214]
 [0.69224549 0.69522349 0.69226249 0.69489854 0.69077665]
 [0.69227236 0.69520234 0.69230271 0.69489679 0.69080048]
 [0.69229927 0.69518054 0.69234313 0.69489617 0.69082361]]




In [64]:
xem_log = xpan_beta.factory_extrapmodel(4, refBeta, udata, xdata, 
                                            xalpha=False, central=False, minus_log=True)

In [65]:
xem_log.xcoefs(norm=False) - extModelLog.params

In [66]:
xem_log.predict(betas, order=3) - extModelLog.predict(betas, order=3)

In [67]:
%%timeit -n 1 -r 1
print(xem_log.resample(100).predict(betas,order=2).std('rep'))

<xarray.DataArray (beta: 4, val: 5)>
array([[0.00097982, 0.00096949, 0.00089208, 0.0008623 , 0.0008552 ],
       [0.00097688, 0.0009669 , 0.00089064, 0.00086231, 0.00085347],
       [0.00097458, 0.00096483, 0.00088979, 0.00086318, 0.00085273],
       [0.00097292, 0.00096326, 0.00088954, 0.00086491, 0.00085297]])
Coordinates:
  * beta     (beta) float64 0.1 0.2 0.3 0.4
    dalpha   (beta) float64 -0.4 -0.3 -0.2 -0.1
    beta0    float64 0.5
Dimensions without coordinates: val
1.91 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [68]:
%%timeit -n 1 -r 1
#And bootstrapped uncertainties
print("Bootstrapped uncertainties in predictions:")
print(extModelLog.bootstrap(betas, order=2))

Bootstrapped uncertainties in predictions:
[[0.00192916 0.00164824 0.00180508 0.00168583 0.00181047]
 [0.00192294 0.00164835 0.00180356 0.00167088 0.00180307]
 [0.0019179  0.0016503  0.00180392 0.0016578  0.00179714]
 [0.00191404 0.00165404 0.00180615 0.00164661 0.00179271]]
7.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
