In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from thermoextrap import *
import thermoextrap


In [2]:
# xtrapy stuff:
# note, just using single module right now.  Can retinker
import thermoextrap.xtrapy as xtrapy
import thermoextrap.xtrapy.core as core
import thermoextrap.xtrapy.xpan_beta as xpan_beta
import xarray as xr
from importlib import reload

In [3]:
order = 6
fs = [thermoextrap.symDerivAvgX(i) for i in range(order+1)]

In [55]:
# test Data
# samples
n = int(1e5)
# number of x values
nv = 5

_u_offset = 0.0
_x_offset = 0.0


np.random.seed(0)
u = np.random.rand(n) + _u_offset
x = np.random.rand(n, nv) + _x_offset

# second set, to testing
ub = np.random.rand(n) + _u_offset
xb = np.random.rand(n, nv) + _x_offset

# comparison of xtrapy

## Data averages

In [60]:
reload(core)
reload(xtrapy.data)
reload(xpan_beta)

<module 'thermoextrap.xtrapy.xpan_beta' from '/Users/wpk/Documents/python/projects/thermodynamic-extrapolation/thermoextrap/xtrapy/xpan_beta.py'>

In [None]:
xpanbeta
xpan


In [75]:
# Test "coefs"
ufunc, xufunc = thermoextrap.buildAvgFuncs(x, u, order)
coefs_list = [fs[i](ufunc, xufunc) for i in range(order+1)]

In [62]:
# coefficient class handler
# raw moments

# coefficient builder
s = xpan_beta.factory_coefs(xalpha=False, central=False)
# data object
data = xpan_beta.factory_data(u, x, order=order, chunk=1000)

In [87]:
em = thermoextrap.ExtrapModel(order, 0.5, x, u)
xem = xpan_beta.factory_extrapmodel(0.5, data)

In [None]:
xem.al

In [78]:
%psource em

In [73]:
xem.coefs.xcoefs(data, norm=False)  - em.params

In [66]:
em.params - coefs_list

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [18]:
np.array(coefs_list) - s.xcoefs(data, norm=False)

In [17]:
s.xcoefs(data, norm=False)

In [10]:
np.testing.assert_allclose(data.u, [ufunc(i) for i in range(order+1)])
np.testing.assert_allclose(data.xu, [xufunc(i) for i in range(order+1)])

In [11]:
# Note very slight difference in averages.
# this is due to numerics
print(data.u - [ufunc(i) for i in range(order+1)])
print(data.xu - [xufunc(i) for i in range(order+1)])

<xarray.DataArray 'u' (mom_u: 7)>
array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       5.55111512e-17, 2.77555756e-17, 0.00000000e+00])
Dimensions without coordinates: mom_u
<xarray.DataArray 'x' (mom_u: 7, val: 5)>
array([[ 1.11022302e-15,  6.66133815e-16, -3.33066907e-16,
         6.77236045e-15, -4.77395901e-15],
       [-1.49880108e-15, -3.88578059e-16,  0.00000000e+00,
        -5.55111512e-17, -1.27675648e-15],
       [ 2.77555756e-17, -1.99840144e-15,  1.44328993e-15,
         1.05471187e-15,  2.49800181e-16],
       [ 4.71844785e-16,  9.43689571e-16,  1.44328993e-15,
         1.80411242e-16, -7.21644966e-16],
       [ 1.66533454e-15, -1.04083409e-15,  1.33226763e-15,
        -7.63278329e-16,  1.98452366e-15],
       [ 3.74700271e-16, -4.85722573e-16,  4.30211422e-16,
        -1.20736754e-15, -2.77555756e-17],
       [ 2.77555756e-16,  6.52256027e-16,  4.16333634e-17,
         2.91433544e-16,  8.32667268e-16]])
Dimensions without coordinates: mom_u, val


## New data models!

have included two new data models.  These are based on the cmomy (central moment) python package.
This package handles co-moment calculation, combination, resampling, etc.
There are two base classes.  DataStatsCovVals and DataStatsCov.  The first is for 
working with individual samples (like factory_data method above).  The second is for
working with pre-averaged data (i.e., average co-moments coming from simulation). This latter class has several 
constructor methods to handle raw and central co-moments, and has xarray support to make things clear.


Note that the ordering of axes is different for 

In [46]:
# working with direct samples
# note that under the hood, numpy arrays are wrapped to xarray objects using the same method
# as above
xdata = xpan_beta.DataStatsCovVals.from_vals(uv=u, xv=x, order=order, central=False)

In [47]:
print(np.abs(xdata.xu - data.xu).max())
print(np.abs(xdata.u  - data.u).max())

<xarray.DataArray 'x' ()>
array(5.21804822e-15)
<xarray.DataArray ()>
array(6.35602682e-15)


In [48]:
# instead, we can work directly with already averaged data.
# lets pretend we have averaged data

tmp_data = xpan_beta.factory_data(
    uv = xr.DataArray(u.reshape(100, n // 100), dims=['rec','ave']),
    xv = xr.DataArray(x.reshape(100, n // 100, 5), dims=['rec','ave', 'val']),
    order=order, central=False, rec='ave', rep='rec',
)



In [59]:
# note, don't have to pass w here, as it is uniform, but will for comparison purposes
xdata2 = xpan_beta.DataStatsCov.from_vals(uv=tmp_data.uv, xv=tmp_data.xv, axis='ave', order=order, central=False)

In [103]:
# alternatively, can construct from moments
# note, since all samples have equal weight, we don't need to pass it, but good idea to
xdata3 = xpan_beta.DataStatsCov.from_ave_raw(u=tmp_data.u, xu=tmp_data.xu, w n//100)

SyntaxError: invalid syntax (<ipython-input-103-57cbcafd7357>, line 3)

In [105]:
np.abs(xdata3.values - xdata2.values).max()

In [67]:
print(np.abs(xdata2.u - tmp_data.u).max())
print(np.abs(xdata2.xu - tmp_data.xu).max())

<xarray.DataArray ()>
array(1.33226763e-15)
<xarray.DataArray ()>
array(1.77635684e-15)


In [72]:
# fakeout resample
# will resample along "rec" dimension
idx = xtrapy.data.resample_indicies(len(xdata2), nrep=20)

In [83]:
# to fake out old data model, will stack the 'rec' and 'ave' dimensions to a new dimension 'ave2'
# which we will reduce over
tmp_data_r = xpan_beta.factory_data(
    uv = tmp_data.uv[idx].stack(ave2=('rec','ave')),
    xv = tmp_data.xv[idx].stack(ave2=('rec','ave')),
    order=order, central=False, rec='ave2'
)

In [88]:
xdata2_r = xdata2.resample(indices=idx)

In [93]:
print(np.abs(xdata2_r.u - tmp_data_r.u).max())
print(np.abs(xdata2_r.xu - tmp_data_r.xu).max())

<xarray.DataArray ()>
array(3.88578059e-16)
<xarray.DataArray ()>
array(1.40998324e-14)


In [73]:
# can resample along rec
out = xdata2.resample(nrep=20, axis='rec')

In [111]:
# note that xdata 2 has still has a 'rec' dimension
# to compare, reduce along this axis
# reduce along 'rec'
xdata3 = xdata2.reduce('rec')
xdata3.xu - data.xu

In [112]:
# can also directly create this object from unaveraged values
# note that 'dims' is the final dimension names for all dimensions except mom_x, and moment
xdata4 = xpan_beta.DataStatsCov.from_vals(uv=u, xv=x, axis=0, dims=['val'], order=order)

In [117]:
print(np.abs(xdata4.u - data.u).max())
print(np.abs(xdata4.xu - data.xu).max())

<xarray.DataArray ()>
array(6.35602682e-15)
<xarray.DataArray ()>
array(5.21804822e-15)


## coefficients of expansion

In [118]:
# norm = True, include 1/ n!  factor
# if want straight coeffs, use norm=False
c = s.xcoefs(data, norm=False) 
c

In [121]:
# test equal to old way
np.testing.assert_allclose(c, coefs_list, rtol=1e-5)
# also works with "new" data model
np.testing.assert_allclose(c, s.xcoefs(xdata, norm=False))

In [127]:
# instead, can use central moments
s_c = xpan_beta.factory_coefs(central=True)

# old model
data_c = xpan_beta.factory_data(u, x, order=order, central=True)
c_c = s_c.xcoefs(data_c, norm=False)

# new model
xdata_c = xpan_beta.DataStatsCovVals.from_vals(uv=u, xv=x, order=order, central=True)
xc_c = s_c.xcoefs(xdata_c, norm=False)

# test close
np.testing.assert_allclose(s.xcoefs(data), s_c.xcoefs(data_c))
np.testing.assert_allclose(s.xcoefs(data), s_c.xcoefs(xdata_c))


# nice-ness of central moments

In [128]:
# coeffs using raw moments u[i], xu[i]
for i in range(order+1):
    print('{}: {}'.format(i, s.exprs[i]))

0: xu[0]
1: u[1]*xu[0] - xu[1]
2: 2*u[1]**2*xu[0] - 2*u[1]*xu[1] - u[2]*xu[0] + xu[2]
3: 6*u[1]**3*xu[0] - 6*u[1]**2*xu[1] - 6*u[1]*u[2]*xu[0] + 3*u[1]*xu[2] + 3*u[2]*xu[1] + u[3]*xu[0] - xu[3]
4: 24*u[1]**4*xu[0] - 24*u[1]**3*xu[1] - 36*u[1]**2*u[2]*xu[0] + 12*u[1]**2*xu[2] + 24*u[1]*u[2]*xu[1] + 8*u[1]*u[3]*xu[0] - 4*u[1]*xu[3] + 6*u[2]**2*xu[0] - 6*u[2]*xu[2] - 4*u[3]*xu[1] - u[4]*xu[0] + xu[4]
5: 120*u[1]**5*xu[0] - 120*u[1]**4*xu[1] - 240*u[1]**3*u[2]*xu[0] + 60*u[1]**3*xu[2] + 180*u[1]**2*u[2]*xu[1] + 60*u[1]**2*u[3]*xu[0] - 20*u[1]**2*xu[3] + 90*u[1]*u[2]**2*xu[0] - 60*u[1]*u[2]*xu[2] - 40*u[1]*u[3]*xu[1] - 10*u[1]*u[4]*xu[0] + 5*u[1]*xu[4] - 30*u[2]**2*xu[1] - 20*u[2]*u[3]*xu[0] + 10*u[2]*xu[3] + 10*u[3]*xu[2] + 5*u[4]*xu[1] + u[5]*xu[0] - xu[5]
6: 720*u[1]**6*xu[0] - 720*u[1]**5*xu[1] - 1800*u[1]**4*u[2]*xu[0] + 360*u[1]**4*xu[2] + 1440*u[1]**3*u[2]*xu[1] + 480*u[1]**3*u[3]*xu[0] - 120*u[1]**3*xu[3] + 1080*u[1]**2*u[2]**2*xu[0] - 540*u[1]**2*u[2]*xu[2] - 360*u[1]**2*u[3]*xu[1]

In [129]:
# coeffs using central moments du[i] = <(u-<u>)**i> 
for i in range(order+1):
    print('{}: {}'.format(i, s_c.exprs[i]))

0: x1
1: -dxdu[1]
2: dxdu[2]
3: 3*du[2]*dxdu[1] - dxdu[3]
4: -6*du[2]*dxdu[2] - 4*du[3]*dxdu[1] + dxdu[4]
5: -30*du[2]**2*dxdu[1] + 10*du[2]*dxdu[3] + 10*du[3]*dxdu[2] + 5*du[4]*dxdu[1] - dxdu[5]
6: 90*du[2]**2*dxdu[2] + 120*du[2]*du[3]*dxdu[1] - 15*du[2]*dxdu[4] - 20*du[3]*dxdu[3] - 15*du[4]*dxdu[2] - 6*du[5]*dxdu[1] + dxdu[6]


In [130]:
# so not only are central moments more stable, the resulting expressions
# are way simplier!

# taking advantage of xarray

In [131]:
# NOTE: have not implemented this for StatsCov objects yet
# TODO: implement dataset stuff for StatsCov objects...

In [134]:
# make a dataset of two values:

x_set = xr.merge((xtrapy.data.xrwrap_xv(xx, name=name) 
                  for xx, name in zip([x, xb], ['a','b'])))
print(x_set)

data_set = xpan_beta.factory_data(u, x_set, order)
data_set_c = xpan_beta.factory_data(u, x_set, order, central=True)

# need "other" x for comparison
datab = xpan_beta.factory_data(u, xb, order)
xdatab = xpan_beta.DataStatsCovVals.from_vals(uv=u, xv=xb, order=order, central=False)

datab_c = xpan_beta.factory_data(u, xb, order, central=True)
xdatab_c = xpan_beta.DataStatsCovVals.from_vals(uv=u, xv=xb, order=order, central=True)

<xarray.Dataset>
Dimensions:  (rec: 100000, val: 5)
Dimensions without coordinates: rec, val
Data variables:
    a        (rec, val) float64 0.5353 0.904 0.5024 ... 0.7553 0.2138 0.7261
    b        (rec, val) float64 0.6237 0.1782 0.2007 ... 0.08856 0.0782 0.4235


In [135]:
# note that data_set has multiple measurements in it.  Very nice...
print(data_set.xv)
print(data_set.xu)

<xarray.Dataset>
Dimensions:  (rec: 100000, val: 5)
Dimensions without coordinates: rec, val
Data variables:
    a        (rec, val) float64 0.5353 0.904 0.5024 ... 0.7553 0.2138 0.7261
    b        (rec, val) float64 0.6237 0.1782 0.2007 ... 0.08856 0.0782 0.4235
<xarray.Dataset>
Dimensions:  (mom_u: 7, val: 5)
Dimensions without coordinates: mom_u, val
Data variables:
    a        (mom_u, val) float64 0.5004 0.499 0.5004 ... 0.07138 0.07166
    b        (mom_u, val) float64 0.5001 0.5015 0.5001 ... 0.07127 0.07149


In [136]:
ufuncb, xufuncb = thermoextrap.buildAvgFuncs(xb, u, order)
coefs_listb = [fs[i](ufuncb, xufuncb) for i in range(order+1)]

In [137]:
# make sure have same value
np.testing.assert_allclose(s.xcoefs(datab,  norm=False), coefs_listb)

In [138]:
# consider dataset
c_set = s.xcoefs(data_set, norm=False)
c_set

In [139]:
# testing
cb = s.xcoefs(datab, norm=False)
np.testing.assert_allclose(c_set.a, c)
np.testing.assert_allclose(c_set.b, cb)

In [140]:
# central
c_set_c = s_c.xcoefs(data_set_c, norm=False)
# test
xr.testing.assert_allclose(c_set, c_set_c)

# resampling

In [141]:
# explicit resample index

In [143]:
idx = xtrapy.data.resample_indicies(size=len(data), nrep=10)

out = data.resample(indices=idx)

xout = xdata.resample(indices=idx)

xr.testing.assert_allclose(xout.xu.transpose(*out.xu.dims),out.xu)

In [146]:
# testing resampling:
# use same resampler for all
idx = xtrapy.data.resample_indicies(len(data), nrep=10)

In [147]:
c_r = s.xcoefs(data.resample(indices=idx))
cb_r = s.xcoefs(datab.resample(indices=idx))
cs_r = s.xcoefs(data_set.resample(indices=idx))

In [148]:
np.testing.assert_allclose(cs_r.a, c_r)
np.testing.assert_allclose(cs_r.b, cb_r)

In [149]:
# center
cs_c_r = s_c.xcoefs(data_set_c.resample(indices=idx))

In [150]:
xr.testing.assert_allclose(cs_r, cs_c_r)

# resampling time and chunking

In [151]:
# xarray is built on dask.  I'm no expert, that this speed things up
# use chunking to create a dask array of uv, xv

In [152]:
idx = xtrapy.data.resample_indicies(len(data), nrep=200)

In [153]:
%%timeit -n 1 -r 1
r = data.resample(indices=idx, chunk=None)
r.u
r.xu

3.3 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [154]:
%%timeit -n 1 -r 1
r = data.resample(indices=idx, chunk=10000)
r.u
r.xu

2.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [155]:
%%timeit -n 1 -r 1
# note that xdata does not use chunking
# but for reasonably sized data, is faster
# might want to use the dask backended other method
# for big data problems.
r = xdata.resample(indices=idx)
r.u
r.xu

2.21 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Extrap model

In [52]:
reload(xtrapy.data)
reload(core)
reload(xpan_beta)

<module 'thermoextrap.xtrapy.xpan_beta' from '/Users/wpk/Documents/python/projects/thermodynamic-extrapolation/thermoextrap/xtrapy/xpan_beta.py'>

In [53]:
# test Extrap model
betas = [0.3, 0.4]
ref_beta = 0.5

In [56]:
# blank model
em = thermoextrap.ExtrapModel(maxOrder=order)

params = em.train(ref_beta, xData=x, uData=u, saveParams=True)

em.predict(betas, order=3)

array([[0.50043757, 0.49897351, 0.50042238, 0.49912602, 0.50117469],
       [0.50042413, 0.49898436, 0.50040218, 0.49912628, 0.50116313]])

In [57]:
%psource em

In [58]:
# Note
# with the new data models being available
# decided to have the data creation completely separate from 
# model creation
data = xpan_beta.factory_data(uv=u, xv=x, order=order, central=False)

In [59]:
# note that you can specify order, central, xalpha directly
# otherwise, this info is inferred from data object
xem = xpan_beta.factory_extrapmodel(alpha0=ref_beta, data=data)
xem.predict(betas, order=3)

In [60]:
xem.predict(betas, order=3) - em.predict(betas, order=3)

In [176]:
# central
cdata = xpan_beta.factory_data(uv=u, xv=x, order=order, central=True)
xem_c = xpan_beta.factory_extrapmodel(alpha0=ref_beta, data=cdata)
xem_c.predict(betas, order=3)


In [177]:
# using other data model

# from values, raw
xdata = xpan_beta.DataStatsCovVals.from_vals(xv=x, uv=u, order=order, central=False)
xem2 = xpan_beta.factory_extrapmodel(alpha0=ref_beta, data=xdata)

# from values, central
xdata_c = xpan_beta.DataStatsCovVals.from_vals(xv=x, uv=u, order=order, central=True)
xem2_c = xpan_beta.factory_extrapmodel(alpha0=ref_beta, data=xdata_c, central=True)

In [178]:
xem2.predict(betas, order=3)

In [179]:
xem2_c.predict(betas, order=3)

In [254]:
# resample
em.bootstrap(betas, n=20)

array([[0.00089247, 0.0008622 , 0.00077916, 0.00071364, 0.00108004],
       [0.00089258, 0.00086074, 0.0007747 , 0.00070903, 0.00107836]])

In [50]:
a = em.bootstrap(betas, n=10, order=3)

In [51]:
b = xem.resample(nrep=10).predict(betas, order=3).std('rep')

In [55]:
np.testing.assert_allclose(a, b, rtol=0.1, atol=0.1)

In [181]:
idx = xpan_beta.resample_indicies(len(xem.data), nrep=20)

In [183]:
# resample data
xem_r = xem.resample(indices=idx)
xem_r.predict(betas).std('rep')

In [184]:
# pipeline
(
    xem_c
    .resample(indices=idx)
    .predict(betas)
    .std('rep')
)

In [185]:
# pipeline
(
    xem2
    .resample(indices=idx)
    .predict(betas)
    .std('rep')
)

In [186]:
# pipeline
(
    xem2_c
    .resample(indices=idx)
    .predict(betas)
    .std('rep')
)

In [199]:
# blank model
emb = thermoextrap.ExtrapModel(maxOrder=order)

paramsb = emb.train(ref_beta, xData=xb, uData=u, saveParams=True)

emb.predict(betas, order=3)

array([[0.50020531, 0.50140101, 0.50011405, 0.5000504 , 0.50035491],
       [0.50016636, 0.5014265 , 0.5000875 , 0.50006491, 0.50036642]])

In [193]:
datab = xpan_beta.factory_data(uv=u, xv=xb, central=True, order=order)
data_set = xpan_beta.factory_data(uv=u, xv=x_set, central=True, order=order)

In [194]:
xemb = xpan_beta.factory_extrapmodel(alpha0=ref_beta, data=datab)
xem_set = xpan_beta.factory_extrapmodel(alpha0=ref_beta, data=data_set)

In [206]:
np.testing.assert_allclose(emb.predict(betas), xemb.predict(betas))

In [209]:
# testing dataset
out = xem.predict(betas)
outb = xemb.predict(betas)
outs = xem_set.predict(betas)

np.testing.assert_allclose(out, outs.a)
np.testing.assert_allclose(outb, outs.b)

In [212]:
# resampling
idx = xpan_beta.resample_indicies(len(x), 20)

out = xem.resample(indices=idx).predict(betas)
outb = xemb.resample(indices=idx).predict(betas)
outs = xem_set.resample(indices=idx).predict(betas)

In [213]:
np.testing.assert_allclose(out, outs.a)
np.testing.assert_allclose(outb, outs.b)

In [214]:
# resampling much faster with chunking

In [215]:
%%timeit -n 1 -r 1
print(em.bootstrap(betas))

[[0.00078462 0.00107165 0.00095946 0.00081731 0.00095329]
 [0.00078408 0.00107141 0.00096099 0.00081307 0.00095403]]
4.61 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [216]:
%%timeit -n 1 -r 1
print(xem.resample(nrep=100, chunk=1000).predict(betas).std('rep'))

<xarray.DataArray (beta: 2, val: 5)>
array([[0.00085433, 0.00090198, 0.00088924, 0.00090106, 0.00086705],
       [0.00085323, 0.00089672, 0.00089113, 0.0009027 , 0.00086764]])
Coordinates:
  * beta     (beta) float64 0.3 0.4
    dalpha   (beta) float64 -0.2 -0.1
    beta0    float64 0.5
Dimensions without coordinates: val
2.03 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [217]:
%%timeit -n 1 -r 1
xem2.resample(nrep=100).predict(betas).std('rep')

1.38 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# ExtrapWeighted

In [5]:
u = np.random.rand(2,n) + 5
x = np.random.rand(2,n, nv) + 10
xb = np.random.rand(2,n, 5) + 2
order = 4

In [6]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/thermodynamic-extrapolation/thermoextrap/xtrapy/core.py'>

In [7]:
beta0 = [0.05, 0.5]
betas = [0.1, 0.2, 0.3, 0.4]

In [8]:
emw = thermoextrap.ExtrapWeightedModel(order, beta0, x, u)
emwB = thermoextrap.ExtrapWeightedModel(order, beta0, xb, u)

In [9]:
# for xtrapy, create list of models for each state:

# first create datasets for xs
xs = [xr.Dataset({'a': xtrapy.data.xrwrap_xv(xx, val='vala'), 
                  'b': xtrapy.data.xrwrap_xv(xxb, val='valb')})
      for xx, xxb in zip(x, xb)
     ]

# now crete list of models for each state
states = [xpan_beta.factory_extrapmodel(alpha0=b0, 
                                        data=xpan_beta.factory_data(uv=uu, xv=xx, central=True, order=order)
#                                        data=xpan_beta.DataStatsCovVals.from_vals(uv=uu, xv=xx, central=True, order=order)
                                       )
         for b0, uu, xx in zip(beta0, u, xs)]

xemw = core.ExtrapWeightedModel(states)

In [10]:
emw.predict(betas, order=3)

array([[10.50040962, 10.49939258, 10.50091405, 10.50029923, 10.49901318],
       [10.50044288, 10.49942658, 10.50090643, 10.50027989, 10.499016  ],
       [10.49887528, 10.4995365 , 10.49875984, 10.50028556, 10.49895176],
       [10.49888078, 10.4995398 , 10.4987535 , 10.50026924, 10.49896069]])

In [11]:
xemw.predict(betas, order=3).a

In [326]:
states_xa = [xpan_beta.factory_extrapmodel(alpha0=b0, 
                                          data=xpan_beta.DataStatsCovVals.from_vals(uv=uu, xv=xx, val='vala', central=True, order=order)
                                         )
             for b0, uu, xx in zip(beta0, u, x)]
xemw_xa = core.ExtrapWeightedModel(states_xa)

states_a = [xpan_beta.factory_extrapmodel(alpha0=b0, 
                                          data=xpan_beta.factory_data(uv=uu, xv=xx, val='vala', central=True, order=order)
                                         )
             for b0, uu, xx in zip(beta0, u, x)]
xemw_a = core.ExtrapWeightedModel(states_a)

In [328]:
np.testing.assert_allclose(emw.predict(betas), xemw_a.predict(betas))
np.testing.assert_allclose(emw.predict(betas), xemw_xa.predict(betas))

In [289]:
np.testing.assert_allclose(emw.predict(betas), xemw.predict(betas).a)
np.testing.assert_allclose(emwB.predict(betas), xemw.predict(betas).b)

In [290]:
idxs = [xpan_beta.resample_indicies(len(xemw[0].data), 20) for i in range(2)]
out = xemw.resample(indices=idxs, nrep=None).predict(betas)
out_a = xemw_a.resample(indices=idxs, nrep=None).predict(betas)

In [315]:
xemw_r = core.ExtrapWeightedModel([s.resample(indices=idx) for s, idx in zip(states, idxs)])

In [319]:
xr.testing.assert_allclose(xemw_r.predict(betas), out)

In [321]:
%%timeit -n 1 -r 1
emw.bootstrap(betas, n=100)

5.75 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [323]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw.resample(nrep=100).predict(betas).std('rep')

8.37 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [324]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw.resample(nrep=100, chunk=1000).predict(betas).std('rep')

4.95 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [325]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw_a.resample(nrep=100, chunk=10000).predict(betas).std('rep')

2.74 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [330]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw_xa.resample(nrep=100).predict(betas).std('rep')

1.52 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [333]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw_xa.resample(nrep=500).predict(betas).std('rep')

7.19 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [331]:
from dask.diagnostics import ProgressBar

In [332]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
with ProgressBar():
    xemw_a.resample(nrep=500, chunk=10000).predict(betas).std('rep')

[########################################] | 100% Completed |  0.2s
[########################################] | 100% Completed |  0.4s
[########################################] | 100% Completed |  3.0s
[########################################] | 100% Completed |  0.2s
[########################################] | 100% Completed |  0.5s
[########################################] | 100% Completed |  2.3s
17.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# InterpModel

In [22]:
emi = thermoextrap.InterpModel(3, beta0, x, u)
#emib = thermoextrap.InterpModel(order, beta0, xb, u)

In [13]:
xemi = core.InterpModel(states)

In [24]:
emi.predict(betas, order=3)

array([[10.50040307, 10.49939287, 10.50090532, 10.50029934, 10.49901291],
       [10.50016155, 10.49943834, 10.50053273, 10.50028463, 10.49900443],
       [10.49946882, 10.49949745, 10.49956593, 10.50027749, 10.49897911],
       [10.49895775, 10.4995351 , 10.4988576 , 10.50026814, 10.49896415]])

In [25]:
xemi.predict(betas, order=3).a

In [None]:
xpan_beta.factory_coefs()

In [338]:
states_b = [xpan_beta.factory_extrapmodel(alpha0=b0, 
                                          data=xpan_beta.DataStatsCovVals.from_vals(uv=uu, xv=xx, central=True, order=order)
                                         )
             for b0, uu, xx in zip(beta0, u, xb)]

xemi_a = core.InterpModel(states_a)
xemi_b = core.InterpModel(states_b)


In [339]:
alpha = [0.1, 0.2]

In [340]:
np.testing.assert_allclose(emi.predict(betas), xemi.predict(betas).a)
np.testing.assert_allclose(emib.predict(betas), xemi.predict(betas).b)

In [341]:
emi.bootstrap(betas, n=20)

array([[0.00063353, 0.00095567, 0.00074263, 0.00086128, 0.00100507],
       [0.00053849, 0.00083358, 0.00066747, 0.00071959, 0.00089027],
       [0.0005003 , 0.00078429, 0.00082489, 0.00061902, 0.00084559],
       [0.00070869, 0.00106836, 0.00116987, 0.00091483, 0.00110888]])

In [343]:
xemi.resample(nrep=20).predict(betas).std('rep').a

In [344]:
%%timeit -n 1 -r 1
emi.bootstrap(betas, n=100)

6 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [346]:
%%timeit -n 1 -r 1
xemi_a.resample(nrep=100, chunk=10000).predict(betas).std('rep')

2.65 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# MBAR

In [26]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/thermodynamic-extrapolation/thermoextrap/xtrapy/core.py'>

In [35]:
x.shape

(2, 100000, 5)

In [36]:
beta0

[0.05, 0.5]

In [27]:
mbar = thermoextrap.MBARModel(refB=beta0, xData=x, uData=u)

In [28]:
mbar.predict(betas)

array([[10.49959951, 10.49942618, 10.49983521, 10.50031363, 10.49898134],
       [10.49962649, 10.49944425, 10.49984089, 10.50029625, 10.49898654],
       [10.49965427, 10.4994628 , 10.49984647, 10.50027784, 10.49899192],
       [10.49968284, 10.49948183, 10.49985198, 10.5002584 , 10.49899746]])

In [30]:
states_a = [xpan_beta.factory_extrapmodel(alpha0=b0, 
                                          data=xpan_beta.DataCentralMomentsVals.from_vals(uv=uu, xv=xx, central=True, order=order)
#                                          data=xpan_beta.factory_data(uv=uu, xv=xx, central=True, order=order)
                                         )
             for b0, uu, xx in zip(beta0, u, x)]

  entrypoints.init_all()


In [31]:
xmbar = core.MBARModel(states_a)

In [32]:
out = xmbar.predict(betas)

In [33]:
np.testing.assert_allclose(mbar.predict(betas), xmbar.predict(betas))

# Perturb

In [43]:
reload(core)
reload(xpan_beta)

<module 'thermoextrap.xtrapy.xpan_beta' from '/Users/wpk/Documents/python/projects/thermodynamic-extrapolation/thermoextrap/xtrapy/xpan_beta.py'>

In [44]:
beta_ref = 0.5

In [48]:
beta_ref

0.5

In [45]:
pm = thermoextrap.PerturbModel(beta_ref, x[0], u[0])

In [46]:
pm.predict([0.1, 0.2], useMBAR=False)

array([[10.50027714, 10.49924154, 10.50094327, 10.50037793, 10.49901168],
       [10.50030413, 10.49927482, 10.50093759, 10.50036169, 10.49901039]])

In [47]:
xpm = xpan_beta.factory_perturbmodel(beta_ref, u[0], x[0])

In [42]:
xpm.predict([0.1, 0.2])

# log func

In [61]:
from thermoextrap.utilities import buildAvgFuncs

#For quantities like the chemical potential, we're interested in the -log(<X>), not <X>
#Everything is the same, but we take derivatives differently
#Luckily, have closed-form expression for derivatives of -log(<X>) in terms of derivatives of <X>
#Specifically, d(n)[-log(<X>)]/dB(n) = Sum(k=1, n)[(k-1)! * (-1/<X>)^k * B(n,k,(d<X>/dB, ..., d(n-k+1)<X>/dB(n-k+1)))]
#B(n,k, (...)) represents Bell Polynomials, which are implemented in sympy (not numpy or scipy unfortunately)
#Create custom classes to handle this
from sympy import bell

class LogAvgExtrapModel(ExtrapModel):
    
    def calcDerivVals(self, refB, x, U):
        
        if x.shape[0] != U.shape[0]:
            print('First observable dimension (%i) and size of potential energy array (%i) do not match!'%(x.shape[0], U.shape[0]))
            return
    
        avgUfunc, avgXUfunc = buildAvgFuncs(x, U, self.maxOrder)
        derivVals = np.zeros((self.maxOrder+1, x.shape[1]))
        for o in range(self.maxOrder+1):
            if o == 0:
                derivVals[o] = (-np.log(avgXUfunc(0)))
                continue
            for k in range(1,o+1):
                #Get the derivatives of the average quantity
                thisDiffs = np.array([self.derivF[l](avgUfunc, avgXUfunc) for l in range(1, o-k+2)])
                #Loop to apply the chain rule to each element of the observable array
                for l in range(x.shape[1]):
                    derivVals[o,l] += np.math.factorial(k-1)*((-1/avgXUfunc(0)[l])**k)*bell(o, k, thisDiffs[:,l])
                
        return derivVals


In [70]:
xdata = xem.data.xv.values
udata = xem.data.uv.values
refBeta=0.5

In [71]:
#Create and train extrapolation model
extModelLog = LogAvgExtrapModel(maxOrder=4, refB=refBeta, 
                                xData=xdata,
                                uData=udata,
                                )

#Note that we handled the -log calculation in the definition of the derivatives (even at zeroth order).
#This means we want to just pass data, not the -log of the data.

#Check the parameters
print("Model parameters (derivatives):")
print(extModelLog.params)
print('\n')

#Finally, look at predictions
print("Model predictions:")
print(extModelLog.predict(betas, order=2))
print('\n')

Model parameters (derivatives):
[[ 6.92326220e-01  6.95158101e-01  6.92383757e-01  6.94896673e-01
   6.90846049e-01]
 [ 2.69773834e-04 -2.27673014e-04  4.07282201e-04  1.07002690e-05
   2.20961626e-04]
 [ 4.48585482e-06 -6.47472658e-05  2.02998635e-05  1.12768749e-04
  -6.90717840e-05]
 [-5.07388591e-05  5.69296180e-05 -5.54391284e-05  9.44529753e-05
  -6.20967030e-05]
 [ 1.05400889e-05  2.10718898e-05 -1.32468610e-05 -4.42767814e-05
   1.91285621e-05]]


Model predictions:
[[0.69227236 0.69520234 0.69230271 0.69489679 0.69080048]
 [0.69229927 0.69518054 0.69234313 0.69489617 0.69082361]]




In [76]:
xem_log = xpan_beta.factory_extrapmodel(
    alpha0=refBeta, 
    data=xpan_beta.factory_data(uv=udata, xv=xdata, order=4, central=False),
    #data=xpan_beta.DataCentralMomentsVals.from_vals(order=4, uv=udata, xv=xdata, central=False),
    minus_log=True)


In [85]:
extModelLog.params

array([[ 6.92326220e-01,  6.95158101e-01,  6.92383757e-01,
         6.94896673e-01,  6.90846049e-01],
       [ 2.69773834e-04, -2.27673014e-04,  4.07282201e-04,
         1.07002690e-05,  2.20961626e-04],
       [ 4.48585482e-06, -6.47472658e-05,  2.02998635e-05,
         1.12768749e-04, -6.90717840e-05],
       [-5.07388591e-05,  5.69296180e-05, -5.54391284e-05,
         9.44529753e-05, -6.20967030e-05],
       [ 1.05400889e-05,  2.10718898e-05, -1.32468610e-05,
        -4.42767814e-05,  1.91285621e-05]])

In [86]:
xem_log.coefs.xcoefs(xem_log.data, norm=True, minus_log=True)

In [81]:
np.testing.assert_allclose(xem_log.predict(betas, order=4), extModelLog.predict(betas, order=4))

In [389]:
xem_log.xcoefs(norm=False) - extModelLog.params

In [66]:
xem_log.predict(betas, order=3) - extModelLog.predict(betas, order=3)

In [67]:
%%timeit -n 1 -r 1
print(xem_log.resample(100).predict(betas,order=2).std('rep'))

<xarray.DataArray (beta: 4, val: 5)>
array([[0.00097982, 0.00096949, 0.00089208, 0.0008623 , 0.0008552 ],
       [0.00097688, 0.0009669 , 0.00089064, 0.00086231, 0.00085347],
       [0.00097458, 0.00096483, 0.00088979, 0.00086318, 0.00085273],
       [0.00097292, 0.00096326, 0.00088954, 0.00086491, 0.00085297]])
Coordinates:
  * beta     (beta) float64 0.1 0.2 0.3 0.4
    dalpha   (beta) float64 -0.4 -0.3 -0.2 -0.1
    beta0    float64 0.5
Dimensions without coordinates: val
1.91 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [68]:
%%timeit -n 1 -r 1
#And bootstrapped uncertainties
print("Bootstrapped uncertainties in predictions:")
print(extModelLog.bootstrap(betas, order=2))

Bootstrapped uncertainties in predictions:
[[0.00192916 0.00164824 0.00180508 0.00168583 0.00181047]
 [0.00192294 0.00164835 0.00180356 0.00167088 0.00180307]
 [0.0019179  0.0016503  0.00180392 0.0016578  0.00179714]
 [0.00191404 0.00165404 0.00180615 0.00164661 0.00179271]]
7.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
