In [1]:
import thermoextrap.xtrapy.xpan_beta as xpan_beta
import xarray as xr
import numpy as np
import thermoextrap.xtrapy.core as xtrapy_core

In [None]:
def xr_test(a, b):
    xr.testing.assert_allclose(a, b.transpose(*a.dims))

In [140]:
# synthetic data
# pretending data for rdf(pair, r)
# here, pair is for a-a, a-b, b-b
shape = (20, 3, 5)
dims = ['rec', 'pair', 'position']


xems = []
for beta in [0.1, 10.0]:
    x = xr.DataArray(np.random.rand(*shape), dims=dims)
    u = xr.DataArray(np.random.rand(shape[0]), dims=dims[0])
    data = xpan_beta.DataCentralMomentsVals.from_vals(x, u, order=3, central=True)
    xems.append(xpan_beta.factory_extrapmodel(beta, data))

In [70]:
def stack_xr_for_gp(da, xdims, ydims=None, 
                    xname='xstack',
                    yname='ystack',
                    policy='raise'):
    """
    Given an xarray.DataArray, stack into new shape for analysis
    
    Parameters
    ----------
    dims_x : str or tuple of strings
        dimensions to consider for X
    dims_y : str, tuple of strings, optional
        if passed, these dimensions are stacked.  If not passed,
        use all remaining dimensions
    policy : {'raise', 'infer'}
        policy if coordinates not available
    
    Returns
    -------
    x : array, shape=(N, D)
        feature array to consider.  this will be in the order specified by dims_x
    y : array, sahpe=(N, P)
    
    
    """
    dims = da.dims
    for name in [xname, yname]:
        if name in dims:
            raise ValueError('{} conficts with existing {}'.format(xname, dims))
    
    if isinstance(xdims, str):
        xdims = (xdims,)
        
    stacker = {xname : xdims}
    if isinstance(ydims, str):
        ydims = (ydims,)
    elif ydims is None:
        ydims = tuple(set(dims) - set(xdims))
    
    if len(ydims) > 0:
        stacker[yname] = ydims
        
    if policy == 'raise':
        for dim in xdims:
            if dim not in da.coords:
                raise ValueError('da.coords[{}] not set'.format(dim))
    
    out = da.stack(**stacker)
    
    return out


def get_x_from_stack(stack, xname='xstack'):
    return np.array(list(stack[xname].values))


import pandas as pd
def to_ave_var(da, dim_reduce, dim_concat=None, concat_kws=None, reduce_kws=None):
    """
    for a dataarray apply average/variance along a dimension
    """
    if reduce_kws is None:
        reduce_kws = {}
    if concat_kws is None:
        concat_kws = {}
        
    if dim_concat is None:
        dim_concat = pd.Index(['mean','var'], name='variable')
    
    return xr.concat(
        (da.mean(dim_reduce, **reduce_kws), 
         da.var(dim_reduce, **reduce_kws)), 
        dim=dim_concat, **concat_kws)
    
    
    

In [69]:
s = xtrapy_core.StateCollection(xems)
Y = xems[0].xcoefs(norm=False)

In [71]:
# stack.values is Y[N, P]
stack = stack_xr_for_gp(Y, ('a','b', 'order'), policy='infer')

In [77]:
# this is X[N, D]
get_x_from_stack(stack)

array([[0, 0, 0],
       [0, 0, 1],
       [0, 0, 2],
       [0, 0, 3],
       [0, 1, 0],
       [0, 1, 1],
       [0, 1, 2],
       [0, 1, 3]])

In [78]:
# loop over "P"
for ymeta, g in stack.groupby('ystack'):
    print(ymeta, g)

(0,) <xarray.DataArray (xstack: 8)>
array([ 5.09886627e-01,  9.49579876e-04, -4.00989523e-03,  2.37580532e-04,
        5.08380876e-01, -9.40720364e-03, -2.88294278e-03,  2.19793584e-03])
Coordinates:
  * xstack   (xstack) MultiIndex
  - a        (xstack) int64 0 0 0 0 0 0 0 0
  - b        (xstack) int64 0 0 0 0 1 1 1 1
  - order    (xstack) int64 0 1 2 3 0 1 2 3
    ystack   object (0,)
(1,) <xarray.DataArray (xstack: 8)>
array([ 0.46268571, -0.01036135,  0.0010101 ,  0.00184783,  0.54285313,
       -0.00447586,  0.00179578,  0.00098901])
Coordinates:
  * xstack   (xstack) MultiIndex
  - a        (xstack) int64 0 0 0 0 0 0 0 0
  - b        (xstack) int64 0 0 0 0 1 1 1 1
  - order    (xstack) int64 0 1 2 3 0 1 2 3
    ystack   object (1,)
(2,) <xarray.DataArray (xstack: 8)>
array([ 5.02188288e-01, -7.41753312e-03,  2.62539031e-04,  5.93356094e-04,
        4.94726464e-01,  1.41507940e-05, -7.01443938e-04, -3.73269814e-04])
Coordinates:
  * xstack   (xstack) MultiIndex
  - a        (xstac

In [81]:
# toy example for gpflow stuff
s = xtrapy_core.StateCollection(xems)

In [112]:
import pandas as pd
def to_ave_var(da, dim_reduce, dim_concat=None, concat_kws=None, reduce_kws=None):
    """
    for a dataarray apply average/variance along a dimension
    """
    if reduce_kws is None:
        reduce_kws = {}
    if concat_kws is None:
        concat_kws = {}
        
    if dim_concat is None:
        dim_concat = pd.Index(['mean','var'], name='variable')
    
    return xr.concat(
        (da.mean(dim_reduce, **reduce_kws), 
         da.var(dim_reduce, **reduce_kws)), 
        dim=dim_concat, **concat_kws)
    

In [129]:
# resample the object to take mean/var
sr = s.resample(nrep=100)

In [136]:
Y = xr.concat((x.xcoefs(norm=False).pipe(to_ave_var, 'rep') for x in sr), dim=pd.Index(sr.alpha0, name=sr.alpha_name))

In [139]:
Y.dims

('beta', 'variable', 'order', 'a', 'b', 'c')

In [None]:
stack_xr_for_gp(Y, xdims=['beta','a','b'])