In [1]:
import numpy as np


# xtrapy stuff:
# note, just using single module right now.  Can retinker
import thermoextrap.xtrapy.core as core
import thermoextrap.xtrapy.data as data
import thermoextrap.xtrapy.xpan_beta as xpan_beta

import xarray as xr
from importlib import reload
import cmomy.xcentral as xcentral

In [10]:
def xr_test(a, b):
    xr.testing.assert_allclose(a, b.transpose(*a.dims))

def cdata_test(cdata, xdata):
    # central test
    xr_test(cdata.dxdu, xdata.dxdu)
    xr_test(cdata.du, xdata.du.isel(val=0))
    xr_test(cdata.xave, xdata.xave)

    for i in range(order):
        xr_test(b=xdata.du_selector[i].isel(val=0), a=cdata.du_selector[i])
        xr_test(b=xdata.dxdu_selector[i], a=cdata.dxdu_selector[i])

def rdata_test(rdata, xdata):
    # raw test
    xr_test(b=xdata.xu, a=rdata.xu)
    xr_test(rdata.u, xdata.u.isel(val=0))

    for i in range(order):
        xr_test(b=xdata.u_selector[i].isel(val=0), a=rdata.u_selector[i])
        xr_test(b=xdata.xu_selector[i], a=rdata.xu_selector[i])

In [70]:
# test Data
# samples
n = int(1e4)
# number of x values
nv = 5

_u_offset = 0.0
_x_offset = 0.0


np.random.seed(0)
u = np.random.rand(n) + _u_offset
x = np.random.rand(n, nv) + _x_offset

# second set, to testing
ub = np.random.rand(n) + _u_offset
xb = np.random.rand(n, nv) + _x_offset

order= 6

In [71]:
# old data models
rdata = xpan_beta.factory_data(u, x, order=order, chunk=1000)
cdata = xpan_beta.factory_data(u, x, order=order, chunk=1000, central=True)

In [72]:
data.DataCentralMomentsVals

thermoextrap.xtrapy.data.DataCentralMomentsVals

In [73]:
# DataCentral is based on multiple averages
# DataCentralVals is based on multiple raw values

In [74]:
xdata = data.DataCentralMoments.from_vals(xv=x, uv=u, order=order, 
                                          # this is the dimension of anything other than rec
                                          
                                          dims=['val'])

rdata_test(rdata, xdata)
cdata_test(cdata, xdata)

In [75]:
xdata_vals = data.DataCentralMomentsVals.from_vals(xv=x, uv=u, order=order, 
                                          # this is the dimension of anything other than rec
                                          
                                          dims=['val'])

rdata_test(rdata, xdata_vals)
cdata_test(cdata, xdata_vals)

In [78]:
o =data.DataCentralMomentsVals.from_vals(xv=rdata.xv, uv=rdata.uv, order=order)

In [80]:
o.values - xdata_vals.values

In [47]:
# xdata_vals holds onto underlying x, v observations
# so resampling is across these observations
# xdata is just the average data
# if have multiple averages, then can resample along this dimension

In [49]:
xdata.dxduave

<CentralMoments(val_shape=(5,), mom=(1, 6))>
<xarray.DataArray (val: 5, mom_x: 2, mom_u: 7)>
array([[[ 1.00000000e+04,  4.96458892e-01,  8.38630072e-02,
          2.69903711e-04,  1.26427586e-02,  9.07335583e-05,
          2.26382617e-03],
        [ 4.98283036e-01,  7.52744092e-04,  5.35937191e-05,
          1.99085775e-04,  1.43465962e-05,  4.78314312e-05,
          2.96161501e-06]],

       [[ 1.00000000e+04,  4.96458892e-01,  8.38630072e-02,
          2.69903711e-04,  1.26427586e-02,  9.07335583e-05,
          2.26382617e-03],
        [ 5.00173539e-01,  1.26470171e-04,  3.61504378e-05,
         -5.11347640e-07,  1.30578256e-05, -1.87593402e-06,
          2.70531028e-06]],

       [[ 1.00000000e+04,  4.96458892e-01,  8.38630072e-02,
          2.69903711e-04,  1.26427586e-02,  9.07335583e-05,
          2.26382617e-03],
        [ 5.01300428e-01, -3.87593927e-04,  3.17036655e-04,
         -8.47050150e-05,  7.29172110e-05, -2.32156705e-05,
          1.48795559e-05]],

       [[ 1.0000000

In [62]:
# resample along 'val'
ndat = xdata.dxduave.shape[0]
nrep = 20
idx = np.random.choice(ndat, (nrep, ndat), True)
xdata_r = xdata.resample(axis='val', indices=idx)
xdata_r.dxduave

<CentralMoments(val_shape=(20,), mom=(1, 6))>
<xarray.DataArray (rep: 20, mom_x: 2, mom_u: 7)>
array([[[ 5.00000000e+04,  4.96458892e-01,  8.38630072e-02,
          2.69903711e-04,  1.26427586e-02,  9.07335583e-05,
          2.26382617e-03],
        [ 4.98950330e-01,  4.13269943e-04,  8.81777034e-05,
          9.28957691e-05,  2.24901790e-05,  1.94237881e-05,
          4.65040735e-06]],

       [[ 5.00000000e+04,  4.96458892e-01,  8.38630072e-02,
          2.69903711e-04,  1.26427586e-02,  9.07335583e-05,
          2.26382617e-03],
        [ 4.98341049e-01,  5.45296203e-04,  4.32153590e-05,
          8.95507275e-05,  1.18524319e-05,  1.66550115e-05,
          2.60357692e-06]],

       [[ 5.00000000e+04,  4.96458892e-01,  8.38630072e-02,
          2.69903711e-04,  1.26427586e-02,  9.07335583e-05,
          2.26382617e-03],
        [ 4.98321759e-01,  4.40966421e-04,  5.89458102e-05,
          7.38709227e-05,  1.58646068e-05,  1.09102886e-05,
          3.36333757e-06]],
...
       [[ 5.00

In [66]:
xdata_vals.xv.sizes['rec']

10000

In [69]:
# on the other hand, xdata_vals is resampled along the original xdata
ndat = xdata_vals.xv.sizes['rec']
nrep = 20
idx = np.random.choice(ndat, (nrep, ndat), True)
xdata_vals_resamp = xdata_vals.resample(indices=idx)
xdata_vals_resamp.dxduave

<CentralMoments(val_shape=(20, 5), mom=(1, 6))>
<xarray.DataArray 'x' (rep: 20, val: 5, mom_x: 2, mom_u: 7)>
array([[[[ 1.00000000e+04,  4.97118585e-01,  8.51138619e-02, ...,
           1.29693457e-02,  3.77389496e-05,  2.33730700e-03],
         [ 4.96151836e-01,  1.09793640e-03,  2.08543078e-04, ...,
           6.24102693e-05,  4.24870065e-05,  1.61402174e-05]],

        [[ 1.00000000e+04,  4.97118585e-01,  8.51138619e-02, ...,
           1.29693457e-02,  3.77389496e-05,  2.33730700e-03],
         [ 5.01996313e-01,  7.30186363e-05, -2.63890046e-05, ...,
          -2.74999322e-06,  2.07230591e-06, -2.93534418e-07]],

        [[ 1.00000000e+04,  4.97118585e-01,  8.51138619e-02, ...,
           1.29693457e-02,  3.77389496e-05,  2.33730700e-03],
         [ 4.98113723e-01, -7.70931032e-04,  3.30187184e-05, ...,
          -5.49813314e-06, -4.67486978e-05, -3.35086671e-06]],

        [[ 1.00000000e+04,  4.97118585e-01,  8.51138619e-02, ...,
           1.29693457e-02,  3.77389496e-05,  2.3373

In [17]:
# resampling

idx = data.resample_indicies(len(cdata), nrep=100)

a = cdata.resample(indices=idx)
b = data.DataCentralMoments.from_resample_vals(xv=x, uv=u, order=order, indices=idx, parallel=True, dims=['val'])
c = data.DataCentralMoments.from_resample_vals(xv=cdata.xv, uv=cdata.uv, order=order, indices=idx, parallel=True, dims=['val'])

In [20]:
cdata_test(a, b)
cdata_test(a, c)

In [18]:
%timeit -n 1 -r 1 a = cdata.resample(indices=idx).dxdu
%timeit -n 1 -r 1 b=data.DataCentralMoments.from_resample_vals(xv=x, uv=u, order=order, indices=idx, parallel=True, dims=['val'])
%timeit -n 1 -r 1 c=data.DataCentralMoments.from_resample_vals(xv=cdata.xv, uv=cdata.uv, order=order, indices=idx, parallel=True, dims=['val'])

254 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
148 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
166 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [22]:
# from average values
xdata = data.DataCentralMoments.from_ave_raw(u=rdata.u.values, xu=rdata.xu.values, w=len(rdata.uv), 
                                                  axis=0, dims=['val'])#, mom_u='moment')

rdata_test(rdata, xdata)
cdata_test(cdata, xdata)

# with xr
xdata = data.DataCentralMoments.from_ave_raw(u=rdata.u, xu=rdata.xu, w=len(rdata.uv), mom_u='mom_u')

rdata_test(rdata, xdata)
cdata_test(cdata, xdata)

In [23]:
# from central aves
xdata = data.DataCentralMoments.from_ave_central(du=cdata.du.values, 
                                                      dxdu=cdata.dxdu.values, 
                                                      xave=cdata.xave.values,
                                                      uave=rdata.u.values[1],
                                                     w=len(rdata.uv), axis=0, dims=['val'])#, mom_u='moment')
rdata_test(rdata, xdata)
cdata_test(cdata, xdata)

xdata = data.DataCentralMoments.from_ave_central(du=cdata.du,
                                                      dxdu=cdata.dxdu,
                                                      xave=cdata.xave,
                                                      uave=rdata.u.sel(mom_u=1),
                                                     w=len(rdata.uv))

rdata_test(rdata, xdata)
cdata_test(cdata, xdata)


In [28]:
xdata = data.DataCentralMoments.from_ave_central(du=xdata.dxduave.values.sel(mom_x=0),
                                                  dxdu=xdata.dxduave.values.sel(mom_x=1),
                                                 )
rdata_test(rdata, xdata)
cdata_test(cdata, xdata)


In [29]:
# resampling

In [30]:
# test Data
# samples
n = int(1e3)
# number of x values
nv = 5

_u_offset = 0.0
_x_offset = 0.0


np.random.seed(0)
u = np.random.rand(n) + _u_offset
x = np.random.rand(n, nv) + _x_offset

# second set, to testing
ub = np.random.rand(n) + _u_offset
xb = np.random.rand(n, nv) + _x_offset

In [31]:
rdata_base = xpan_beta.factory_data(u, x, order=order, chunk=1000)
cdata_base = xpan_beta.factory_data(u, x, order=order, chunk=1000, central=True)

xdata_base = data.DataCentralMoments.from_vals(xv=x[None,...], uv=u[None,...,None], axis=0, 
                                                  order=order, dims=['rec','val'])#, mom_u='moment')

In [32]:
xdata_base2 = data.DataCentralMomentsVals.from_vals(xv=x, uv=u, order=order)
xdata_base3 = data.DataCentralMomentsVals.from_vals(
    xv=xr.DataArray(x, dims=['rec','val']).transpose('val',...),
    uv=xr.DataArray(u, dims=['rec']),
    order=order
)


In [33]:
idx = data.resample_indicies(n, 100)

In [34]:
cdata = cdata_base.resample(nrep=None, indices=idx)
rdata = rdata_base.resample(nrep=None, indices=idx)

In [35]:
xdata = xdata_base.resample(indices=idx, parallel=True)
xdata2 = xdata_base2.resample(indices=idx)
xdata3 = xdata_base3.resample(indices=idx)

In [38]:
for xd in xdata, xdata2, xdata3:
    cdata_test(cdata, xd)
    rdata_test(rdata, xd)