In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from thermoextrap import *
import thermoextrap


In [3]:
# xtrapy stuff:
# note, just using single module right now.  Can retinker
import thermoextrap.xtrapy.core as core
import xarray as xr

In [4]:
from importlib import reload

In [6]:
order = 6
fs = [thermoextrap.symDerivAvgX(i) for i in range(order+1)]

In [58]:
# test Data
# samples
n = int(1e5)
# number of x values
nv = 5

_u_offset = 0.0
_x_offset = 0.0


np.random.seed(0)
u = np.random.randn(n) + _u_offset
x = np.random.randn(n, nv) + _x_offset

# second set, to testing
ub = np.random.randn(n) + _u_offset
xb = np.random.randn(n, nv) + _x_offset

# comparison of xtrapy

## Data averages

In [22]:
# Test "coefs"
ufunc, xufunc = thermoextrap.buildAvgFuncs(x, u, order)
coefs_list = [fs[i](ufunc, xufunc) for i in range(order+1)]

In [12]:
# coefficient class handler
# raw moments

# coefficient builder
s = core.factory_coefs()
# data object
data = core.factory_data(u, x, order=order, chunk=1000)

In [18]:
# Note very slight difference in averages.
# this is due to numerics
print(data.u - [ufunc(i) for i in range(order+1)])
print(data.xu - [xufunc(i) for i in range(order+1)])

<xarray.DataArray (moment: 7)>
array([ 0.00000000e+00, -1.73472348e-18,  2.22044605e-16,  1.56125113e-17,
       -4.44089210e-16,  4.16333634e-17,  0.00000000e+00])
Dimensions without coordinates: moment
<xarray.DataArray (moment: 7, val: 5)>
array([[-2.31748214e-18, -1.95156391e-17, -1.95156391e-17,
        -2.16840434e-17,  3.64291930e-17],
       [ 3.68628739e-17,  3.79470760e-17, -2.60208521e-18,
        -2.64545330e-17,  2.51534904e-17],
       [ 3.38271078e-17, -1.21430643e-17,  2.77555756e-17,
        -5.37764278e-17, -3.90312782e-18],
       [-1.04083409e-17, -1.37043155e-16,  3.22008045e-17,
        -9.54097912e-17,  1.21430643e-17],
       [-1.80411242e-16,  1.38777878e-17,  1.97064587e-15,
        -3.52148866e-16,  2.35922393e-16],
       [-1.13797860e-15,  9.71445147e-17,  1.11022302e-16,
         1.34614542e-15,  3.19189120e-16],
       [ 2.69229083e-15,  0.00000000e+00, -2.55351296e-15,
         1.11022302e-16,  3.55271368e-15]])
Dimensions without coordinates: moment, va

## coefficients of expansion

In [103]:
# norm = True, include 1/ n!  factor
# if want straight coeffs, use norm=False
c = s.xcoefs(data, norm=False) 
c

In [104]:
# test equal to old way
np.testing.assert_allclose(c, coefs_list, rtol=1e-5)

In [105]:
# instead, can use central moments
s_c = core.factory_coefs(central=True)
data_c = core.factory_data(u, x, order=order, central=True)

c_c = s_c.xcoefs(data_c, norm=False)
c_c

In [106]:
# test close
np.testing.assert_allclose(s.xcoefs(data), sc.xcoefs(datac))

# nice-ness of central moments

In [107]:
# coeffs using raw moments u[i], xu[i]
for i in range(order+1):
    print('{}: {}'.format(i, s.exprs[i]))

0: xu[0]
1: u[1]*xu[0] - xu[1]
2: 2*u[1]**2*xu[0] - 2*u[1]*xu[1] - u[2]*xu[0] + xu[2]
3: 6*u[1]**3*xu[0] - 6*u[1]**2*xu[1] - 6*u[1]*u[2]*xu[0] + 3*u[1]*xu[2] + 3*u[2]*xu[1] + u[3]*xu[0] - xu[3]
4: 24*u[1]**4*xu[0] - 24*u[1]**3*xu[1] - 36*u[1]**2*u[2]*xu[0] + 12*u[1]**2*xu[2] + 24*u[1]*u[2]*xu[1] + 8*u[1]*u[3]*xu[0] - 4*u[1]*xu[3] + 6*u[2]**2*xu[0] - 6*u[2]*xu[2] - 4*u[3]*xu[1] - u[4]*xu[0] + xu[4]
5: 120*u[1]**5*xu[0] - 120*u[1]**4*xu[1] - 240*u[1]**3*u[2]*xu[0] + 60*u[1]**3*xu[2] + 180*u[1]**2*u[2]*xu[1] + 60*u[1]**2*u[3]*xu[0] - 20*u[1]**2*xu[3] + 90*u[1]*u[2]**2*xu[0] - 60*u[1]*u[2]*xu[2] - 40*u[1]*u[3]*xu[1] - 10*u[1]*u[4]*xu[0] + 5*u[1]*xu[4] - 30*u[2]**2*xu[1] - 20*u[2]*u[3]*xu[0] + 10*u[2]*xu[3] + 10*u[3]*xu[2] + 5*u[4]*xu[1] + u[5]*xu[0] - xu[5]
6: 720*u[1]**6*xu[0] - 720*u[1]**5*xu[1] - 1800*u[1]**4*u[2]*xu[0] + 360*u[1]**4*xu[2] + 1440*u[1]**3*u[2]*xu[1] + 480*u[1]**3*u[3]*xu[0] - 120*u[1]**3*xu[3] + 1080*u[1]**2*u[2]**2*xu[0] - 540*u[1]**2*u[2]*xu[2] - 360*u[1]**2*u[3]*xu[1]

In [66]:
# coeffs using central moments du[i] = <(u-<u>)**i> 

for i in range(order+1):
    print('{}: {}'.format(i, s_c.exprs[i]))

0: x1
1: -dxdu[1]
2: dxdu[2]
3: 3*du[2]*dxdu[1] - dxdu[3]
4: -6*du[2]*dxdu[2] - 4*du[3]*dxdu[1] + dxdu[4]
5: -30*du[2]**2*dxdu[1] + 10*du[2]*dxdu[3] + 10*du[3]*dxdu[2] + 5*du[4]*dxdu[1] - dxdu[5]
6: 90*du[2]**2*dxdu[2] + 120*du[2]*du[3]*dxdu[1] - 15*du[2]*dxdu[4] - 20*du[3]*dxdu[3] - 15*du[4]*dxdu[2] - 6*du[5]*dxdu[1] + dxdu[6]


In [None]:
# so not only are central moments more stable, the resulting expressions
# are way simplier!

# taking advantage of xarray

In [71]:
# make a dataset of two values:

x_set = xr.merge((core.xrwrap_xv(xx, name=name) 
                  for xx, name in zip([x, xB], ['a','b'])))
x_set

data_set = core.factory_data(u, x_set, order)
data_set_c = core.factory_data(u, x_set, order, central=True)


# need "other" x for comparison
datab = core.factory_data(u, xb, order)
datab_c = core.factory_data(u, xb, order, central=True)

In [113]:
# note that data_set has multiple measurements in it.  Very nice...
print(data_set.xv)
print(data_set.xu)

<xarray.Dataset>
Dimensions:  (rec: 100000, val: 5)
Dimensions without coordinates: rec, val
Data variables:
    a        (rec, val) float64 -0.4838 1.288 -0.1299 ... 0.5532 1.626 -0.327
    b        (rec, val) float64 0.7908 0.3241 -0.2293 ... -2.26 0.5923 1.667
<xarray.Dataset>
Dimensions:  (moment: 7, val: 5)
Dimensions without coordinates: moment, val
Data variables:
    a        (moment, val) float64 -5.266e-06 0.003392 ... 0.3043 -0.2864
    b        (moment, val) float64 -0.003121 0.002491 ... -0.2059 -0.2101


In [114]:
ufuncb, xufuncb = thermoextrap.buildAvgFuncs(xb, u, order)
coefs_listb = [fs[i](ufuncb, xufuncb) for i in range(order+1)]

In [115]:
# make sure have same value
np.testing.assert_allclose(s.xcoefs(datab,  norm=False), coefs_listb)

In [119]:
# consider dataset
c_set = s.xcoefs(data_set, norm=False)
c_set

In [122]:
# testing
cb = s.xcoefs(datab, norm=False)
np.testing.assert_allclose(c_set.a, c)
np.testing.assert_allclose(c_set.b, cb)

In [135]:
# central
c_set_c = s_c.xcoefs(data_set_c, norm=False)
# test
xr.testing.assert_allclose(c_set, c_set_c)

# resampling

In [124]:
# resampling
data.resample(10).u

In [125]:
# testing resampling:
# use same resampler for all
idx = core.resample_indicies(len(data), nrep=10)

In [127]:
c_r = s.xcoefs(data.resample(nrep=None, idx=idx))
cb_r = s.xcoefs(datab.resample(None, idx))
cs_r = s.xcoefs(data_set.resample(None, idx))

In [134]:
np.testing.assert_allclose(cs_r.a, c_r)
np.testing.assert_allclose(cs_r.b, cb_r)

In [138]:
# center
cs_c_r = s_c.xcoefs(data_set_c.resample(None, idx))

In [142]:
xr.testing.assert_allclose(cs_r, cs_c_r)

# resampling time and chunking

In [175]:
# xarray is built on dask.  I'm no expert, that this speed things up
# use chunking to create a dask array of uv, xv

In [159]:
idx = core.resample_indicies(len(a), nrep=100)

In [165]:
a = core.factory_data(u, x, order, False)
# chunk
b = core.factory_data(u, x, order, chunk=1000)

In [166]:
%%timeit -n 1 -r 1
aa = a.resample(None, idx)
aa.u

4.72 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [167]:
%%timeit -n 1 -r 1
bb = b.resample(None, idx)
bb.xu
bb.u

1.83 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [168]:
a = core.factory_data(u, x_set, order, False)
# chunk
b = core.factory_data(u, x_set, order, chunk=1000)

In [171]:
%%timeit -n 1 -r 1
aa = a.resample(None, idx)
aa.u

7.62 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [172]:
%%timeit -n 1 -r 1
bb = b.resample(None, idx)
bb.xu
bb.u

2.99 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [174]:
# note you can also chunk on resampling
# thise is sometimes faster, because reindexing with idx
# can be slow on chunked object

In [173]:
%%timeit -n 1 -r 1
aa = a.resample(None, idx, chunk=1000)
aa.u

1.63 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Extrap model

In [184]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [180]:
# test Extrap model
betas = [0.1, 0.2]

ref_beta = 0.0

In [177]:
# blank model
em = thermoextrap.ExtrapModel(maxOrder=order)

In [181]:
params = em.train(ref_beta, xData=x, uData=u, saveParams=True)

In [182]:
em.predict(betas, order=3)

array([[8.78568874e-05, 3.31082730e-03, 2.46782394e-03, 3.25032496e-03,
        5.29429910e-03],
       [1.12840011e-04, 3.28106719e-03, 2.11578456e-03, 3.05901939e-03,
        5.58962669e-03]])

In [185]:
data = core.factory_data(u, x, order)
xem  = core.ExtrapModel(order, 0.0, data)

In [186]:
xem.predict(betas, order=3)

In [187]:
em.predict(betas)

array([[8.78665315e-05, 3.31090066e-03, 2.46791014e-03, 3.25035385e-03,
        5.29427738e-03],
       [1.12869140e-04, 3.28227295e-03, 2.11718921e-03, 3.05951483e-03,
        5.58926355e-03]])

In [190]:
xem.predict(betas)

In [194]:
# central, and build from "from_values" method
xem_c = core.ExtrapModel.from_values(order, ref_beta, u, x, central=True)
xem_c.predict(betas)

In [195]:
# resample
em.bootstrap(betas, n=20)

array([[0.00293591, 0.00292712, 0.00268771, 0.00349113, 0.00376354],
       [0.00291293, 0.00292223, 0.00281441, 0.00360803, 0.00398392]])

In [200]:
# resample data
xem_r = xem.resample(20)
xem_r.predict(betas).std('rep')

In [203]:
xem_c.resample(20).predict(betas).std('rep')

In [202]:
# note that xem_r is just an Extrap model with new resampled data
xem_r.data.u

In [206]:
# testing dataset
emb = thermoextrap.ExtrapModel(order, ref_beta, xb, u)

In [216]:
xemb = core.ExtrapModel.from_values(order, ref_beta, u, xb, central=True)
xem_set = core.ExtrapModel.from_values(order, ref_beta, u, x_set, central=True)

In [217]:
emb.predict(betas)

array([[-0.00287918,  0.00277187, -0.00130362, -0.00084667,  0.00092772],
       [-0.00262195,  0.00301501, -0.00099706, -0.00029919,  0.00072324]])

In [218]:
xemb.predict(betas)

In [221]:
# testing dataset
out = xem.predict(betas)
outb = xemb.predict(betas)
outs = xem_set.predict(betas)

np.testing.assert_allclose(out, outs.a)
np.testing.assert_allclose(outb, outs.b)

In [223]:
# resampling
idx = core.resample_indicies(len(x), 20)

out = xem.resample(None, idx).predict(betas)
outb = xemb.resample(None, idx).predict(betas)
outs = xem_set.resample(None, idx).predict(betas)

In [224]:
np.testing.assert_allclose(out, outs.a)
np.testing.assert_allclose(outb, outs.b)

In [None]:
# resampling much faster with chunking

In [228]:
%%timeit -n 1 -r 1
em.bootstrap(betas)

5.13 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [229]:
%%timeit -n 1 -r 1
xem.resample(20, chunk=1000).predict(betas)

569 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# ExtrapWeighted

In [265]:
u = np.random.randn(2,n) + 5
x = np.random.randn(2,n, nv) + 10
xb = np.random.randn(2,n, 5) + 2
order = 4

In [266]:
beta0 = [0.0, 0.5]
betas = [0.1, 0.2, 0.3, 0.4]

In [267]:
emw = thermoextrap.ExtrapWeightedModel(order, beta0, x, u)
emwB = thermoextrap.ExtrapWeightedModel(order, beta0, xb, u)

In [281]:
# for xtrapy, create list of models for each state:

# first create datasets for xs
xs = [xr.Dataset({'a': core.xrwrap_xv(xx), 
                  'b': core.xrwrap_xv(xxb)})
      for xx, xxb in zip(x, xb)
     ]

# now crete list of models for each state
states = [core.ExtrapModel.from_values(order, b0, uu, xx, central=True)
         for b0, uu, xx in zip(beta0, u, xs)]

xemw = core.ExtrapWeightedModel(states)

In [282]:
np.testing.assert_allclose(emw.predict(betas), xemw.predict(betas).a)
np.testing.assert_allclose(emwB.predict(betas), xemw.predict(betas).b)

In [315]:
emw.bootstrap(betas, n=20)

array([[0.00317704, 0.00275958, 0.00374537, 0.00299037, 0.00219738],
       [0.00316859, 0.00282162, 0.00381763, 0.00301853, 0.00226352],
       [0.00358896, 0.00260602, 0.00282799, 0.002793  , 0.00380617],
       [0.00350657, 0.00256567, 0.00270791, 0.00282365, 0.00370909]])

In [316]:
xemw.resample(20, chunk=1000).predict(betas).std('rep').a

In [290]:
%%timeit -n 1 -r 1
emw.bootstrap(betas, n=100)

6.42 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [291]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw.resample(100).predict(betas).std('rep')

6.54 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [322]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw_a.resample(100).predict(betas).std('rep')

3.08 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [323]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw.resample(100, chunk=1000).predict(betas).std('rep')

5.89 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# InterpModel

In [333]:
emi = thermoextrap.InterpModel(order, beta0, x, u)
emib = thermoextrap.InterpModel(order, beta0, xb, u)

In [334]:
xemi = core.InterpModel(states)

In [335]:
states_b = [core.ExtrapModel.from_values(order, b0, uu, xx, central=True)
             for b0, uu, xx in zip(beta0, u, xb)]

xemi_a = core.InterpModel(states_a)
xemi_b = core.InterpModel(states_b)


In [336]:
np.testing.assert_allclose(emi.predict(alpha), xemi.predict(alpha).a)
np.testing.assert_allclose(emib.predict(alpha), xemi.predict(alpha).b)

In [342]:
emi.bootstrap(betas, n=20)

array([[0.00308216, 0.00324151, 0.00310084, 0.00375802, 0.00320159],
       [0.00254513, 0.00241886, 0.00227406, 0.00338288, 0.00256168],
       [0.00274609, 0.00241118, 0.00170774, 0.0031182 , 0.00239175],
       [0.0033331 , 0.00310371, 0.00223361, 0.00322888, 0.00289765]])

In [343]:
xemi.resample(20).predict(betas).std('rep').a

In [341]:
%%timeit -n 1 -r 1
emi.bootstrap(betas, n=20)

1.38 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [340]:
%%timeit -n 1 -r 1
xemi.resample(20).predict(betas).std('rep').a

1.16 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# MBAR

# Perturb

In [344]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [353]:
beta_ref = 0.5

In [354]:
pm = thermoextrap.PerturbModel(beta_ref, x[0], u[0])

In [355]:
pm.predict([0.1, 0.2], useMBAR=False)

array([[ 9.99896592, 10.00026166, 10.00013016,  9.99888289, 10.00145388],
       [ 9.9988368 , 10.00063401, 10.00015292,  9.99878397, 10.00098615]])

In [356]:
xpm = core.PerturbModel.from_values(beta_ref, u[0], x[0])

In [357]:
xpm.predict([0.1, 0.2])