In [2]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from thermoextrap import *
import thermoextrap


In [3]:
# xtrapy stuff:
# note, just using single module right now.  Can retinker
import thermoextrap.xtrapy.core as core
import xarray as xr

In [4]:
from importlib import reload

In [6]:
order = 6
fs = [thermoextrap.symDerivAvgX(i) for i in range(order+1)]

In [733]:
# test Data
# samples
n = int(1e5)
# number of x values
nv = 5

_u_offset = 0.0
_x_offset = 0.0


np.random.seed(0)
u = np.random.rand(n) + _u_offset
x = np.random.rand(n, nv) + _x_offset

# second set, to testing
ub = np.random.rand(n) + _u_offset
xb = np.random.rand(n, nv) + _x_offset

# comparison of xtrapy

## Data averages

In [734]:
# Test "coefs"
ufunc, xufunc = thermoextrap.buildAvgFuncs(x, u, order)
coefs_list = [fs[i](ufunc, xufunc) for i in range(order+1)]

In [735]:
# coefficient class handler
# raw moments

# coefficient builder
s = core.factory_coefs()
# data object
data = core.factory_data(u, x, order=order, chunk=1000)

In [736]:
# Note very slight difference in averages.
# this is due to numerics
print(data.u - [ufunc(i) for i in range(order+1)])
print(data.xu - [xufunc(i) for i in range(order+1)])

<xarray.DataArray 'u' (moment: 5)>
array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       5.55111512e-17])
Dimensions without coordinates: moment
<xarray.DataArray 'x' (moment: 5, val: 5)>
array([[ 1.11022302e-15,  6.66133815e-16, -3.33066907e-16,
         6.77236045e-15, -4.77395901e-15],
       [-1.49880108e-15, -3.88578059e-16,  0.00000000e+00,
        -5.55111512e-17, -1.27675648e-15],
       [ 2.77555756e-17, -1.99840144e-15,  1.44328993e-15,
         1.05471187e-15,  2.49800181e-16],
       [ 4.71844785e-16,  9.43689571e-16,  1.44328993e-15,
         1.80411242e-16, -7.21644966e-16],
       [ 1.66533454e-15, -1.04083409e-15,  1.33226763e-15,
        -7.63278329e-16,  1.98452366e-15]])
Dimensions without coordinates: moment, val


## coefficients of expansion

In [737]:
# norm = True, include 1/ n!  factor
# if want straight coeffs, use norm=False
c = s.xcoefs(data, norm=False) 
c

In [738]:
# test equal to old way
np.testing.assert_allclose(c, coefs_list, rtol=1e-5)

In [739]:
# instead, can use central moments
s_c = core.factory_coefs(central=True)
data_c = core.factory_data(u, x, order=order, central=True)

c_c = s_c.xcoefs(data_c, norm=False)
c_c

In [746]:
# test close
np.testing.assert_allclose(s.xcoefs(data), sc.xcoefs(data_c))

# nice-ness of central moments

In [747]:
# coeffs using raw moments u[i], xu[i]
for i in range(order+1):
    print('{}: {}'.format(i, s.exprs[i]))

0: xu[0]
1: u[1]*xu[0] - xu[1]
2: 2*u[1]**2*xu[0] - 2*u[1]*xu[1] - u[2]*xu[0] + xu[2]
3: 6*u[1]**3*xu[0] - 6*u[1]**2*xu[1] - 6*u[1]*u[2]*xu[0] + 3*u[1]*xu[2] + 3*u[2]*xu[1] + u[3]*xu[0] - xu[3]
4: 24*u[1]**4*xu[0] - 24*u[1]**3*xu[1] - 36*u[1]**2*u[2]*xu[0] + 12*u[1]**2*xu[2] + 24*u[1]*u[2]*xu[1] + 8*u[1]*u[3]*xu[0] - 4*u[1]*xu[3] + 6*u[2]**2*xu[0] - 6*u[2]*xu[2] - 4*u[3]*xu[1] - u[4]*xu[0] + xu[4]


In [748]:
# coeffs using central moments du[i] = <(u-<u>)**i> 

for i in range(order+1):
    print('{}: {}'.format(i, s_c.exprs[i]))

0: x1
1: -dxdu[1]
2: dxdu[2]
3: 3*du[2]*dxdu[1] - dxdu[3]
4: -6*du[2]*dxdu[2] - 4*du[3]*dxdu[1] + dxdu[4]


In [749]:
# so not only are central moments more stable, the resulting expressions
# are way simplier!

# taking advantage of xarray

In [754]:
# make a dataset of two values:

x_set = xr.merge((core.xrwrap_xv(xx, name=name) 
                  for xx, name in zip([x, xb], ['a','b'])))
x_set

data_set = core.factory_data(u, x_set, order)
data_set_c = core.factory_data(u, x_set, order, central=True)


# need "other" x for comparison
datab = core.factory_data(u, xb, order)
datab_c = core.factory_data(u, xb, order, central=True)

In [755]:
# note that data_set has multiple measurements in it.  Very nice...
print(data_set.xv)
print(data_set.xu)

<xarray.Dataset>
Dimensions:  (rec: 100000, val: 5)
Dimensions without coordinates: rec, val
Data variables:
    a        (rec, val) float64 0.5353 0.904 0.5024 ... 0.7553 0.2138 0.7261
    b        (rec, val) float64 0.6237 0.1782 0.2007 ... 0.08856 0.0782 0.4235
<xarray.Dataset>
Dimensions:  (moment: 5, val: 5)
Dimensions without coordinates: moment, val
Data variables:
    a        (moment, val) float64 0.5004 0.499 0.5004 ... 0.1002 0.0998 0.1003
    b        (moment, val) float64 0.5001 0.5015 0.5001 ... 0.1002 0.0998 0.1


In [756]:
ufuncb, xufuncb = thermoextrap.buildAvgFuncs(xb, u, order)
coefs_listb = [fs[i](ufuncb, xufuncb) for i in range(order+1)]

In [757]:
# make sure have same value
np.testing.assert_allclose(s.xcoefs(datab,  norm=False), coefs_listb)

In [758]:
# consider dataset
c_set = s.xcoefs(data_set, norm=False)
c_set

In [759]:
# testing
cb = s.xcoefs(datab, norm=False)
np.testing.assert_allclose(c_set.a, c)
np.testing.assert_allclose(c_set.b, cb)

In [760]:
# central
c_set_c = s_c.xcoefs(data_set_c, norm=False)
# test
xr.testing.assert_allclose(c_set, c_set_c)

# resampling

In [761]:
# resampling
data.resample(10).u

In [762]:
# testing resampling:
# use same resampler for all
idx = core.resample_indicies(len(data), nrep=10)

In [763]:
c_r = s.xcoefs(data.resample(nrep=None, idx=idx))
cb_r = s.xcoefs(datab.resample(None, idx))
cs_r = s.xcoefs(data_set.resample(None, idx))

In [764]:
np.testing.assert_allclose(cs_r.a, c_r)
np.testing.assert_allclose(cs_r.b, cb_r)

In [765]:
# center
cs_c_r = s_c.xcoefs(data_set_c.resample(None, idx))

In [766]:
xr.testing.assert_allclose(cs_r, cs_c_r)

# resampling time and chunking

In [767]:
# xarray is built on dask.  I'm no expert, that this speed things up
# use chunking to create a dask array of uv, xv

In [768]:
idx = core.resample_indicies(len(a), nrep=100)

In [769]:
a = core.factory_data(u, x, order, False)
# chunk
b = core.factory_data(u, x, order, chunk=1000)

In [770]:
%%timeit -n 1 -r 1
aa = a.resample(None, idx)
aa.u

6.05 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [771]:
%%timeit -n 1 -r 1
bb = b.resample(None, idx)
bb.xu
bb.u

89.5 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [772]:
a = core.factory_data(u, x_set, order, False)
# chunk
b = core.factory_data(u, x_set, order, chunk=1000)

In [773]:
%%timeit -n 1 -r 1
aa = a.resample(None, idx)
aa.u

10.5 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [774]:
%%timeit -n 1 -r 1
bb = b.resample(None, idx)
bb.xu
bb.u

84.4 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [775]:
# note you can also chunk on resampling
# thise is sometimes faster, because reindexing with idx
# can be slow on chunked object

In [776]:
%%timeit -n 1 -r 1
aa = a.resample(None, idx, chunk=1000)
aa.u

59 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Extrap model

In [777]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [778]:
# test Extrap model
betas = [0.1, 0.2]
ref_beta = 0.0

In [779]:
# blank model
em = thermoextrap.ExtrapModel(maxOrder=order)

In [780]:
params = em.train(ref_beta, xData=x, uData=u, saveParams=True)

In [781]:
em.predict(betas, order=3)

array([[0.50039714, 0.49900707, 0.50036143, 0.4991252 , 0.501141  ],
       [0.50038364, 0.49901888, 0.50034093, 0.49912376, 0.50113048]])

In [782]:
data = core.factory_data(u, x, order)
xem  = core.ExtrapModel(order, 0.0, data)

In [783]:
xem.predict(betas, order=3)

In [784]:
em.predict(betas)

array([[0.50039714, 0.49900707, 0.50036143, 0.4991252 , 0.501141  ],
       [0.50038364, 0.49901888, 0.50034093, 0.49912376, 0.50113048]])

In [785]:
xem.predict(betas)

In [786]:
# central, and build from "from_values" method
xem_c = core.ExtrapModel.from_values(order, ref_beta, u, x, central=True)
xem_c.predict(betas)

In [787]:
# resample
em.bootstrap(betas, n=20)

array([[0.00111307, 0.00079143, 0.00101329, 0.00103734, 0.00074581],
       [0.00111701, 0.00079355, 0.00103067, 0.00104237, 0.00074025]])

In [788]:
# resample data
xem_r = xem.resample(20)
xem_r.predict(betas).std('rep')

In [789]:
xem_c.resample(20).predict(betas).std('rep')

In [790]:
# note that xem_r is just an Extrap model with new resampled data
xem_r.data.u

In [791]:
# testing dataset
emb = thermoextrap.ExtrapModel(order, ref_beta, xb, u)

In [792]:
xemb = core.ExtrapModel.from_values(order, ref_beta, u, xb, central=True)
xem_set = core.ExtrapModel.from_values(order, ref_beta, u, x_set, central=True)

In [793]:
emb.predict(betas)

array([[0.50009022, 0.50147635, 0.50003481, 0.50009276, 0.50039032],
       [0.50005316, 0.50150067, 0.50000879, 0.50010606, 0.50040264]])

In [794]:
xemb.predict(betas)

In [795]:
# testing dataset
out = xem.predict(betas)
outb = xemb.predict(betas)
outs = xem_set.predict(betas)

np.testing.assert_allclose(out, outs.a)
np.testing.assert_allclose(outb, outs.b)

In [796]:
# resampling
idx = core.resample_indicies(len(x), 20)

out = xem.resample(None, idx).predict(betas)
outb = xemb.resample(None, idx).predict(betas)
outs = xem_set.resample(None, idx).predict(betas)

In [797]:
np.testing.assert_allclose(out, outs.a)
np.testing.assert_allclose(outb, outs.b)

In [798]:
# resampling much faster with chunking

In [799]:
%%timeit -n 1 -r 1
em.bootstrap(betas)

2.84 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [800]:
%%timeit -n 1 -r 1
xem.resample(20, chunk=1000).predict(betas)

534 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# ExtrapWeighted

In [801]:
u = np.random.rand(2,n) + 5
x = np.random.rand(2,n, nv) + 10
xb = np.random.rand(2,n, 5) + 2
order = 4

In [802]:
beta0 = [0.05, 0.5]
betas = [0.1, 0.2, 0.3, 0.4]

In [803]:
emw = thermoextrap.ExtrapWeightedModel(order, beta0, x, u)
emwB = thermoextrap.ExtrapWeightedModel(order, beta0, xb, u)

In [804]:
# for xtrapy, create list of models for each state:

# first create datasets for xs
xs = [xr.Dataset({'a': core.xrwrap_xv(xx), 
                  'b': core.xrwrap_xv(xxb)})
      for xx, xxb in zip(x, xb)
     ]

# now crete list of models for each state
states = [core.ExtrapModel.from_values(order, b0, uu, xx, central=True)
         for b0, uu, xx in zip(beta0, u, xs)]

xemw = core.ExtrapWeightedModel(states)

In [805]:
states_a = [core.ExtrapModel.from_values(order, b0, uu, xx, central=True)
             for b0, uu, xx in zip(beta0, u, x)]
xemw_a = core.ExtrapWeightedModel(states_a)

idxs = [core.resample_indicies(len(xemw[0].data), 20) for i in range(2)]

a = xemw.resample(None, idxs).predict(betas).a
b = xemw_a.resample(None, idxs).predict(betas)

In [505]:
np.testing.assert_allclose(emw.predict(betas), xemw.predict(betas).a)
np.testing.assert_allclose(emwB.predict(betas), xemw.predict(betas).b)

In [471]:
emw.bootstrap(betas, n=20)

array([[0.00301384, 0.00346906, 0.00351224, 0.00334989, 0.00269749],
       [0.00311001, 0.0035105 , 0.00346592, 0.00335995, 0.00271277],
       [0.00217716, 0.00357405, 0.0038071 , 0.00301555, 0.00313651],
       [0.00213131, 0.00346612, 0.00363221, 0.00292654, 0.00318416]])

In [472]:
xemw.resample(20, chunk=1000).predict(betas).std('rep').a

In [473]:
%%timeit -n 1 -r 1
emw.bootstrap(betas, n=100)

6.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [489]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw.resample(100).predict(betas).std('rep')

5.33 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [476]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw.resample(100, chunk=1000).predict(betas).std('rep')

5.27 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [492]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
xemw_a.resample(500).predict(betas).std('rep')

24.1 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [494]:
from dask.diagnostics import ProgressBar

In [495]:
%%timeit -n 1 -r 1
# note that is is actually doing twice the work
# because data includes both x and xb
with ProgressBar():
    xemw_a.resample(500, chunk=10000).predict(betas).std('rep')

[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.5s
[########################################] | 100% Completed |  2.2s
[########################################] | 100% Completed |  0.1s
[########################################] | 100% Completed |  0.4s
[########################################] | 100% Completed |  2.2s
17.4 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# InterpModel

In [333]:
emi = thermoextrap.InterpModel(order, beta0, x, u)
emib = thermoextrap.InterpModel(order, beta0, xb, u)

In [334]:
xemi = core.InterpModel(states)

In [None]:
xr.dot()

In [365]:
xr.concat([m.data.uv for m in xemi], dim='state')

In [335]:
states_b = [core.ExtrapModel.from_values(order, b0, uu, xx, central=True)
             for b0, uu, xx in zip(beta0, u, xb)]

xemi_a = core.InterpModel(states_a)
xemi_b = core.InterpModel(states_b)


In [336]:
np.testing.assert_allclose(emi.predict(alpha), xemi.predict(alpha).a)
np.testing.assert_allclose(emib.predict(alpha), xemi.predict(alpha).b)

In [342]:
emi.bootstrap(betas, n=20)

array([[0.00308216, 0.00324151, 0.00310084, 0.00375802, 0.00320159],
       [0.00254513, 0.00241886, 0.00227406, 0.00338288, 0.00256168],
       [0.00274609, 0.00241118, 0.00170774, 0.0031182 , 0.00239175],
       [0.0033331 , 0.00310371, 0.00223361, 0.00322888, 0.00289765]])

In [343]:
xemi.resample(20).predict(betas).std('rep').a

In [341]:
%%timeit -n 1 -r 1
emi.bootstrap(betas, n=20)

1.38 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [340]:
%%timeit -n 1 -r 1
xemi.resample(20).predict(betas).std('rep').a

1.16 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# MBAR

In [497]:
mbar = thermoextrap.MBARModel(refB=beta0, xData=x, uData=u)

In [510]:
mbar.predict(betas)

array([[ 9.99983657,  9.99680049,  9.99637782,  9.99784935, 10.0012848 ],
       [ 9.99969283,  9.99741583,  9.99637626,  9.99771245, 10.00125331],
       [ 9.99950523,  9.99800791,  9.996365  ,  9.99759615, 10.00127627],
       [ 9.99926941,  9.99858284,  9.99634027,  9.9974926 , 10.00135092]])

In [568]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [569]:
xmbar = core.MBARModel(states_a)

In [571]:
out = xmbar.predict(betas)

In [575]:
out

In [574]:
np.testing.assert_allclose(mbar.predict(betas), xmbar.predict(betas))

In [578]:
import os

In [579]:
os.getpid()

2138

# Perturb

In [344]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [353]:
beta_ref = 0.5

In [354]:
pm = thermoextrap.PerturbModel(beta_ref, x[0], u[0])

In [355]:
pm.predict([0.1, 0.2], useMBAR=False)

array([[ 9.99896592, 10.00026166, 10.00013016,  9.99888289, 10.00145388],
       [ 9.9988368 , 10.00063401, 10.00015292,  9.99878397, 10.00098615]])

In [356]:
xpm = core.PerturbModel.from_values(beta_ref, u[0], x[0])

In [357]:
xpm.predict([0.1, 0.2])

# log func

In [1084]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [1088]:
tt = core._SubsCentralMoments()

In [1090]:
s = core.factory_coefs(central=True)

In [1093]:
s.exprs[4]

-6*du[2]*dxdu[2] - 4*du[3]*dxdu[1] + dxdu[4]

In [1089]:
tt[2]

{u[0]: 1,
 xu[0]: x1,
 u[1]: u1,
 xu[1]: u1*x1 + dxdu[1],
 u[2]: u1**2 + du[2],
 xu[2]: u1**2*x1 + 2*u1*dxdu[1] + x1*du[2] + dxdu[2]}

In [1085]:
t = core._Central_u_dxdu()

In [1087]:
t[5]

u1**5 + 10*u1**3*du[2] + 10*u1**2*du[3] + 5*u1*du[4] + du[5]

In [1078]:
ss = core._SubsCentralMoments()

In [1079]:
ss[4]

{u[0]: 1,
 xu[0]: x1,
 u[1]: u1,
 xu[1]: u1*x1 + dxdu[1],
 u[2]: u1**2 + du[2],
 xu[2]: u1**2*x1 + 2*u1*dxdu[1] + x1*du[2] + dxdu[2],
 u[3]: u1**3 + 3*u1*du[2] + du[3],
 xu[3]: u1**3*x1 + 3*u1**2*dxdu[1] + 3*u1*x1*du[2] + 3*u1*dxdu[2] + x1*du[3] + dxdu[3],
 u[4]: u1**4 + 6*u1**2*du[2] + 4*u1*du[3] + du[4],
 xu[4]: u1**4*x1 + 4*u1**3*dxdu[1] + 6*u1**2*x1*du[2] + 6*u1**2*dxdu[2] + 4*u1*x1*du[3] + 4*u1*dxdu[3] + x1*du[4] + dxdu[4]}

In [1060]:
v = core._SymDeriv()
v2 = core._SymDeriv2()

s = core.factory_coefs()

s2 = core._Subs2()

In [1073]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [1074]:
# beta

v2 = core._SymDeriv2()
s2 = core._Subsxbeta2()

In [1075]:
s = core.factory_coefs(xbeta=True)

In [1076]:
%%timeit -n 1 -r 1
s.exprs[6]

855 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [1077]:
%%timeit -n 1 -r 1
v2[6].subs(s2[6])

386 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [1064]:
s.exprs[3]

6*u[1]**3*xu[0, 0] - 6*u[1]**2*xu[0, 1] + 6*u[1]**2*xu[1, 0] - 6*u[1]*u[2]*xu[0, 0] + 3*u[1]*xu[0, 2] - 6*u[1]*xu[1, 1] + 3*u[1]*xu[2, 0] + 3*u[2]*xu[0, 1] - 3*u[2]*xu[1, 0] + u[3]*xu[0, 0] - xu[0, 3] + 3*xu[1, 2] - 3*xu[2, 1] + xu[3, 0]

In [1072]:
n = 6
(s.exprs[n] - v2[n].subs(s2[n])).simplify()

0

In [1058]:
s.exprs[6] - v2[6].subs(s2[6])

0

In [1053]:
%%timeit -n 1 -r 1
print(s.exprs[5])

120*u[1]**5*xu[0] - 120*u[1]**4*xu[1] - 240*u[1]**3*u[2]*xu[0] + 60*u[1]**3*xu[2] + 180*u[1]**2*u[2]*xu[1] + 60*u[1]**2*u[3]*xu[0] - 20*u[1]**2*xu[3] + 90*u[1]*u[2]**2*xu[0] - 60*u[1]*u[2]*xu[2] - 40*u[1]*u[3]*xu[1] - 10*u[1]*u[4]*xu[0] + 5*u[1]*xu[4] - 30*u[2]**2*xu[1] - 20*u[2]*u[3]*xu[0] + 10*u[2]*xu[3] + 10*u[3]*xu[2] + 5*u[4]*xu[1] + u[5]*xu[0] - xu[5]
178 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [1054]:
%%timeit -n 1 -r 1
v2[5].subs(s2[5])

16.7 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [1045]:
new_sub

[(f(b), z(b)*xu[0]),
 (Derivative(f(b), b), -z(b)*xu[1]),
 (Derivative(z(b), b), -z(b)*u[1]),
 (Derivative(f(b), (b, 2)), z(b)*xu[2]),
 (Derivative(z(b), (b, 2)), z(b)*u[2]),
 (Derivative(f(b), (b, 3)), -z(b)*xu[3]),
 (Derivative(z(b), (b, 3)), -z(b)*u[3]),
 (Derivative(f(b), (b, 4)), z(b)*xu[4]),
 (Derivative(z(b), (b, 4)), z(b)*u[4])]

In [1049]:
v2[5].subs(s2[5])

120*u[1]**5*xu[0] - 120*u[1]**4*xu[1] - 240*u[1]**3*u[2]*xu[0] + 60*u[1]**3*xu[2] + 180*u[1]**2*u[2]*xu[1] + 60*u[1]**2*u[3]*xu[0] - 20*u[1]**2*xu[3] + 90*u[1]*u[2]**2*xu[0] - 60*u[1]*u[2]*xu[2] - 40*u[1]*u[3]*xu[1] - 10*u[1]*u[4]*xu[0] + 5*u[1]*xu[4] - 30*u[2]**2*xu[1] - 20*u[2]*u[3]*xu[0] + 10*u[2]*xu[3] + 10*u[3]*xu[2] + 5*u[4]*xu[1] + u[5]*xu[0] - xu[5]

In [1050]:
s.exprs[5]

120*u[1]**5*xu[0] - 120*u[1]**4*xu[1] - 240*u[1]**3*u[2]*xu[0] + 60*u[1]**3*xu[2] + 180*u[1]**2*u[2]*xu[1] + 60*u[1]**2*u[3]*xu[0] - 20*u[1]**2*xu[3] + 90*u[1]*u[2]**2*xu[0] - 60*u[1]*u[2]*xu[2] - 40*u[1]*u[3]*xu[1] - 10*u[1]*u[4]*xu[0] + 5*u[1]*xu[4] - 30*u[2]**2*xu[1] - 20*u[2]*u[3]*xu[0] + 10*u[2]*xu[3] + 10*u[3]*xu[2] + 5*u[4]*xu[1] + u[5]*xu[0] - xu[5]

In [980]:
v2[5]

-f(b)*Derivative(z(b), (b, 5))/z(b)**2 + 10*f(b)*Derivative(z(b), b)*Derivative(z(b), (b, 4))/z(b)**3 + 20*f(b)*Derivative(z(b), (b, 2))*Derivative(z(b), (b, 3))/z(b)**3 - 60*f(b)*Derivative(z(b), b)**2*Derivative(z(b), (b, 3))/z(b)**4 - 90*f(b)*Derivative(z(b), b)*Derivative(z(b), (b, 2))**2/z(b)**4 + 240*f(b)*Derivative(z(b), b)**3*Derivative(z(b), (b, 2))/z(b)**5 - 120*f(b)*Derivative(z(b), b)**5/z(b)**6 + Derivative(f(b), (b, 5))/z(b) - 5*Derivative(f(b), b)*Derivative(z(b), (b, 4))/z(b)**2 - 10*Derivative(f(b), (b, 2))*Derivative(z(b), (b, 3))/z(b)**2 - 10*Derivative(f(b), (b, 3))*Derivative(z(b), (b, 2))/z(b)**2 - 5*Derivative(f(b), (b, 4))*Derivative(z(b), b)/z(b)**2 + 40*Derivative(f(b), b)*Derivative(z(b), b)*Derivative(z(b), (b, 3))/z(b)**3 + 30*Derivative(f(b), b)*Derivative(z(b), (b, 2))**2/z(b)**3 + 60*Derivative(f(b), (b, 2))*Derivative(z(b), b)*Derivative(z(b), (b, 2))/z(b)**3 + 20*Derivative(f(b), (b, 3))*Derivative(z(b), b)**2/z(b)**3 - 180*Derivative(f(b), b)*Derivati

In [1023]:
subs = core._Subs()

In [1024]:
subs[1]

{Derivative(f(b), b): -z(b)*xu[1], Derivative(z(b), b): -z(b)*u[1]}

In [1025]:
n = 4
new_subs = []
for i in range(n+1):
    new_subs += (list(zip(subs[i].keys(), subs[i].values())))

In [1027]:
new_subs

[(f(b), z(b)*xu[0]),
 (Derivative(f(b), b), -z(b)*xu[1]),
 (Derivative(z(b), b), -z(b)*u[1]),
 (Derivative(f(b), (b, 2)), z(b)*xu[2]),
 (Derivative(z(b), (b, 2)), z(b)*u[2]),
 (Derivative(f(b), (b, 3)), -z(b)*xu[3]),
 (Derivative(z(b), (b, 3)), -z(b)*u[3]),
 (Derivative(f(b), (b, 4)), z(b)*xu[4]),
 (Derivative(z(b), (b, 4)), z(b)*u[4])]

In [1031]:
s2._data

[(f(b), z(b)*xu[0]),
 (Derivative(f(b), b), -z(b)*xu[1]),
 (Derivative(z(b), b), -z(b)*u[1]),
 (Derivative(f(b), (b, 2)), z(b)*xu[2]),
 (Derivative(z(b), (b, 2)), z(b)*u[2]),
 (Derivative(f(b), (b, 3)), -z(b)*xu[3]),
 (Derivative(z(b), (b, 3)), -z(b)*u[3]),
 (Derivative(f(b), (b, 4)), z(b)*xu[4]),
 (Derivative(z(b), (b, 4)), z(b)*u[4]),
 (Derivative(f(b), (b, 5)), -z(b)*xu[5]),
 (Derivative(z(b), (b, 5)), -z(b)*u[5])]

In [1030]:
s2[1]

[(Derivative(f(b), b), -z(b)*xu[1]), (f(b), z(b)*xu[0])]

In [939]:
subs[0]

{f(b): z(b)*xu[0]}

In [948]:
subs[1]

{Derivative(f(b), b): -z(b)*xu[1], Derivative(z(b), b): -z(b)*u[1]}

In [949]:
new_subs.update(subs[1])

In [951]:
new_subs = {}
for i in range(n+1):
    new_subs.update(subs[i])
    

In [968]:
new_subs[-1::-1]

[(Derivative(z(b), (b, 4)), z(b)*u[4]),
 (Derivative(f(b), (b, 4)), z(b)*xu[4]),
 (Derivative(z(b), (b, 3)), -z(b)*u[3]),
 (Derivative(f(b), (b, 3)), -z(b)*xu[3]),
 (Derivative(z(b), (b, 2)), z(b)*u[2]),
 (Derivative(f(b), (b, 2)), z(b)*xu[2]),
 (Derivative(z(b), b), -z(b)*u[1]),
 (Derivative(f(b), b), -z(b)*xu[1]),
 (f(b), z(b)*xu[0])]

In [969]:
v[4].subs(new_subs[-1::-1])

24*u[1]**4*xu[0] - 24*u[1]**3*xu[1] - 36*u[1]**2*u[2]*xu[0] + 12*u[1]**2*xu[2] + 24*u[1]*u[2]*xu[1] + 8*u[1]*u[3]*xu[0] - 4*u[1]*xu[3] + 6*u[2]**2*xu[0] - 6*u[2]*xu[2] - 4*u[3]*xu[1] - u[4]*xu[0] + xu[4]

In [957]:
print(v[4])

-f(b)*Derivative(z(b), (b, 4))/z(b)**2 + 8*f(b)*Derivative(z(b), b)*Derivative(z(b), (b, 3))/z(b)**3 + 6*f(b)*Derivative(z(b), (b, 2))**2/z(b)**3 - 36*f(b)*Derivative(z(b), b)**2*Derivative(z(b), (b, 2))/z(b)**4 + 24*f(b)*Derivative(z(b), b)**4/z(b)**5 + Derivative(f(b), (b, 4))/z(b) - 4*Derivative(f(b), b)*Derivative(z(b), (b, 3))/z(b)**2 - 6*Derivative(f(b), (b, 2))*Derivative(z(b), (b, 2))/z(b)**2 - 4*Derivative(f(b), (b, 3))*Derivative(z(b), b)/z(b)**2 + 24*Derivative(f(b), b)*Derivative(z(b), b)*Derivative(z(b), (b, 2))/z(b)**3 + 12*Derivative(f(b), (b, 2))*Derivative(z(b), b)**2/z(b)**3 - 24*Derivative(f(b), b)*Derivative(z(b), b)**3/z(b)**4


In [953]:
v[4].subs(new_subs)

24*u[1]**4*xu[0] - 24*u[1]**3*xu[1] - 36*Derivative(-z(b)*u[1], b)*u[1]**2*xu[0]/z(b) + 24*Derivative(-z(b)*u[1], b)*u[1]*xu[1]/z(b) - 8*Derivative(-z(b)*u[1], (b, 2))*u[1]*xu[0]/z(b) + 4*Derivative(-z(b)*u[1], (b, 2))*xu[1]/z(b) - Derivative(-z(b)*u[1], (b, 3))*xu[0]/z(b) + 12*Derivative(-z(b)*xu[1], b)*u[1]**2/z(b) + 4*Derivative(-z(b)*xu[1], (b, 2))*u[1]/z(b) + Derivative(-z(b)*xu[1], (b, 3))/z(b) + 6*Derivative(-z(b)*u[1], b)**2*xu[0]/z(b)**2 - 6*Derivative(-z(b)*u[1], b)*Derivative(-z(b)*xu[1], b)/z(b)**2

In [954]:
ss = core.factory_coefs()

In [955]:
ss.exprs[4]

24*u[1]**4*xu[0] - 24*u[1]**3*xu[1] - 36*u[1]**2*u[2]*xu[0] + 12*u[1]**2*xu[2] + 24*u[1]*u[2]*xu[1] + 8*u[1]*u[3]*xu[0] - 4*u[1]*xu[3] + 6*u[2]**2*xu[0] - 6*u[2]*xu[2] - 4*u[3]*xu[1] - u[4]*xu[0] + xu[4]

In [934]:
list(zip(subs[4].keys(), subs[4].values()))

[(Derivative(f(b), (b, 4)), z(b)*xu[4]), (Derivative(z(b), (b, 4)), z(b)*u[4])]

In [928]:
v[4].subs(subs[4])

-f(b)*u[4]/z(b) + 8*f(b)*Derivative(z(b), b)*Derivative(z(b), (b, 3))/z(b)**3 + 6*f(b)*Derivative(z(b), (b, 2))**2/z(b)**3 - 36*f(b)*Derivative(z(b), b)**2*Derivative(z(b), (b, 2))/z(b)**4 + 24*f(b)*Derivative(z(b), b)**4/z(b)**5 + xu[4] - 4*Derivative(f(b), b)*Derivative(z(b), (b, 3))/z(b)**2 - 6*Derivative(f(b), (b, 2))*Derivative(z(b), (b, 2))/z(b)**2 - 4*Derivative(f(b), (b, 3))*Derivative(z(b), b)/z(b)**2 + 24*Derivative(f(b), b)*Derivative(z(b), b)*Derivative(z(b), (b, 2))/z(b)**3 + 12*Derivative(f(b), (b, 2))*Derivative(z(b), b)**2/z(b)**3 - 24*Derivative(f(b), b)*Derivative(z(b), b)**3/z(b)**4

In [None]:
core.gcached

In [981]:
class tmp(object):
    @core.gcached(prop=False)
    def __getitem__(self, idx):
        print('call')
        return idx ** 2
    

In [982]:
t = tmp()

In [988]:
y = list(range(100))

In [991]:
y[:10][-1::-1]

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

In [987]:
new_subs[-1::-1]

[(Derivative(z(b), (b, 4)), z(b)*u[4]),
 (Derivative(f(b), (b, 4)), z(b)*xu[4]),
 (Derivative(z(b), (b, 3)), -z(b)*u[3]),
 (Derivative(f(b), (b, 3)), -z(b)*xu[3]),
 (Derivative(z(b), (b, 2)), z(b)*u[2]),
 (Derivative(f(b), (b, 2)), z(b)*xu[2]),
 (Derivative(z(b), b), -z(b)*u[1]),
 (Derivative(f(b), b), -z(b)*xu[1]),
 (f(b), z(b)*xu[0])]

In [985]:
t[10]

100

In [830]:
from thermoextrap.utilities import buildAvgFuncs

#For quantities like the chemical potential, we're interested in the -log(<X>), not <X>
#Everything is the same, but we take derivatives differently
#Luckily, have closed-form expression for derivatives of -log(<X>) in terms of derivatives of <X>
#Specifically, d(n)[-log(<X>)]/dB(n) = Sum(k=1, n)[(k-1)! * (-1/<X>)^k * B(n,k,(d<X>/dB, ..., d(n-k+1)<X>/dB(n-k+1)))]
#B(n,k, (...)) represents Bell Polynomials, which are implemented in sympy (not numpy or scipy unfortunately)
#Create custom classes to handle this
from sympy import bell

class LogAvgExtrapModel(ExtrapModel):
    
    def calcDerivVals(self, refB, x, U):
        
        if x.shape[0] != U.shape[0]:
            print('First observable dimension (%i) and size of potential energy array (%i) do not match!'%(x.shape[0], U.shape[0]))
            return
    
        avgUfunc, avgXUfunc = buildAvgFuncs(x, U, self.maxOrder)
        derivVals = np.zeros((self.maxOrder+1, x.shape[1]))
        for o in range(self.maxOrder+1):
            if o == 0:
                derivVals[o] = (-np.log(avgXUfunc(0)))
                continue
            for k in range(1,o+1):
                #Get the derivatives of the average quantity
                thisDiffs = np.array([self.derivF[l](avgUfunc, avgXUfunc) for l in range(1, o-k+2)])
                #Loop to apply the chain rule to each element of the observable array
                for l in range(x.shape[1]):
                    derivVals[o,l] += np.math.factorial(k-1)*((-1/avgXUfunc(0)[l])**k)*bell(o, k, thisDiffs[:,l])
                
        return derivVals


In [835]:
betas

[0.1, 0.2, 0.3, 0.4]

In [887]:
xdata = xem.data.xv.values
udata = xem.data.uv.values
refBeta=0.5

In [836]:
#Create and train extrapolation model
extModelLog = LogAvgExtrapModel(maxOrder=4, refB=refBeta, 
                                xData=xdata,
                                uData=udata,
                                )

#Note that we handled the -log calculation in the definition of the derivatives (even at zeroth order).
#This means we want to just pass data, not the -log of the data.

#Check the parameters
print("Model parameters (derivatives):")
print(extModelLog.params)
print('\n')

#Finally, look at predictions
print("Model predictions:")
print(extModelLog.predict(betas, order=2))
print('\n')

#And bootstrapped uncertainties
print("Bootstrapped uncertainties in predictions:")
print(extModelLog.bootstrap(betas, order=2))


Model parameters (derivatives):
[[ 6.92326220e-01  6.95158101e-01  6.92383757e-01  6.94896673e-01
   6.90846049e-01]
 [ 2.69773834e-04 -2.27673014e-04  4.07282201e-04  1.07002690e-05
   2.20961626e-04]
 [ 4.48585482e-06 -6.47472658e-05  2.02998635e-05  1.12768749e-04
  -6.90717840e-05]
 [-5.07388591e-05  5.69296180e-05 -5.54391284e-05  9.44529753e-05
  -6.20967030e-05]
 [ 1.05400889e-05  2.10718898e-05 -1.32468610e-05 -4.42767814e-05
   1.91285621e-05]]


Model predictions:
[[0.69221867 0.69524399 0.69222247 0.69490141 0.69075214]
 [0.69224549 0.69522349 0.69226249 0.69489854 0.69077665]
 [0.69227236 0.69520234 0.69230271 0.69489679 0.69080048]
 [0.69229927 0.69518054 0.69234313 0.69489617 0.69082361]]


Bootstrapped uncertainties in predictions:
[[0.00178116 0.00166147 0.00190618 0.00173911 0.0020865 ]
 [0.00178175 0.00165808 0.00191183 0.00173188 0.00207618]
 [0.00178384 0.00165654 0.00191886 0.0017261  0.00206729]
 [0.00178743 0.00165686 0.00192723 0.00172178 0.00205984]]


In [886]:
reload(core)

<module 'thermoextrap.xtrapy.core' from '/Users/wpk/Documents/python/projects/xtrapy/thermoextrap/xtrapy/core.py'>

In [889]:
xem_log = core.ExtrapModel.from_values(order, refBeta, udata, xdata, minus_log=True)

In [896]:
xem_log.xcoefs(norm=False)# - extModelLog.params

In [897]:
extModelLog.params

array([[ 6.92326220e-01,  6.95158101e-01,  6.92383757e-01,
         6.94896673e-01,  6.90846049e-01],
       [ 2.69773834e-04, -2.27673014e-04,  4.07282201e-04,
         1.07002690e-05,  2.20961626e-04],
       [ 4.48585482e-06, -6.47472658e-05,  2.02998635e-05,
         1.12768749e-04, -6.90717840e-05],
       [-5.07388591e-05,  5.69296180e-05, -5.54391284e-05,
         9.44529753e-05, -6.20967030e-05],
       [ 1.05400889e-05,  2.10718898e-05, -1.32468610e-05,
        -4.42767814e-05,  1.91285621e-05]])

In [895]:
xem_log.predict(betas, order=4) - extModelLog.predict(betas, order=4)

In [881]:
s = core.factory_coefs(minus_log=True)

In [884]:
s.xcoefs(xem.data, xem.order, norm=False) - extModelLog.params

In [598]:
sp.bell(4, 2, X)

4*X[0]*X[2] + 3*X[1]**2