In [1]:
import thermoextrap.xtrapy.xpan_beta as xpan_beta
import xarray as xr
import numpy as np
import thermoextrap.xtrapy.core as xtrapy_core

In [2]:
def stack_xr_for_gp(da, xdims, ydims=None, 
                    xname='xstack',
                    yname='ystack',
                    variable=None,
                    policy='raise'):
    """
    Given an xarray.DataArray, stack into new shape for analysis
    
    Parameters
    ----------
    dims_x : str or tuple of strings
        dimensions to consider for X
    dims_y : str, tuple of strings, optional
        if passed, these dimensions are stacked.  If not passed,
        use all remaining dimensions
    variable : str or tuple of strings, optional
        use this to indicate a dimension that contains variables (mean and variance).
        This dimenension is moved to the last position.
    policy : {'raise', 'infer'}
        policy if coordinates not available
    
    Returns
    -------
    x : array, shape=(N, D)
        feature array to consider.  this will be in the order specified by dims_x
    y : array, sahpe=(N, P)
    
    
    """
    dims = da.dims
    for name in [xname, yname]:
        if name in dims:
            raise ValueError('{} conficts with existing {}'.format(xname, dims))
    
    if isinstance(xdims, str):
        xdims = (xdims,)
        
    stacker = {xname : xdims}
    if isinstance(ydims, str):
        ydims = (ydims,)
    elif ydims is None:
        ydims = tuple(set(dims) - set(xdims))
    
    if len(ydims) > 0:
        stacker[yname] = ydims
        
    if policy == 'raise':
        for dim in xdims:
            if dim not in da.coords:
                raise ValueError('da.coords[{}] not set'.format(dim))
    
    out = da.stack(**stacker)
    
    if variable is not None:
        if isinstance(variable, str):
            variable = (variable,)
        out = out.transpose(...,*variable)
    
    return out

def wrap_like(x, da):
    """
    wrap an array x with properties of da
    """
    return xr.DataArray(x, dims=da.dims, coords=da.coords, indexes=da.indexes, attrs=da.attrs, name=da.name)


def get_x_from_stack(stack, xname='xstack'):
    """
    get numpy array of X coordinates
    """
    da = stack[xname]
    out = np.array(list(da.values))
    return out



import pandas as pd
def to_ave_var(da, dim_reduce, dim_concat=None, concat_kws=None, reduce_kws=None):
    """
    for a dataarray apply average/variance along a dimension
    
    new array has dimension `dim_concat` which denotes mean and variance
    
    """
    if reduce_kws is None:
        reduce_kws = {}
    if concat_kws is None:
        concat_kws = {}
        
    if dim_concat is None:
        dim_concat = pd.Index(['mean','var'], name='variable')
    
    return xr.concat(
        (da.mean(dim_reduce, **reduce_kws), 
         da.var(dim_reduce, **reduce_kws)), 
        dim=dim_concat, **concat_kws)
    
    
    

In [55]:
# synthetic data
# pretending data for rdf(pair, r)
# here, pair is for a-a, a-b, b-b
shape = (3, 2, 4)
dims = ['rec', 'pair', 'position']

coords = {'position': np.linspace(0, 2, shape[-1]), 'pair': ['a','b']}

xems = []
for beta in [0.1, 10.0]:
    x = xr.DataArray(np.random.rand(*shape), dims=dims, coords=coords)
    u = xr.DataArray(np.random.rand(shape[0]), dims=dims[0])
    data = xpan_beta.DataCentralMomentsVals.from_vals(x, u, order=3, central=True)
    xems.append(xpan_beta.factory_extrapmodel(beta, data))

s = xtrapy_core.StateCollection(xems)

In [100]:
# resample the object to take mean/var
sr = s.resample(nrep=100)
# create Y data to consider:
Y_unstack = xr.concat(
    (x.xcoefs(norm=False).pipe(to_ave_var, 'rep') for x in sr), 
    dim=pd.Index(sr.alpha0, name=sr.alpha_name))

In [106]:
from importlib import reload

In [119]:
import thermoextrap.xtrapy.xstack as xstack
reload(xstack)

<module 'thermoextrap.xtrapy.xstack' from '/Users/wpk/Documents/python/projects/thermodynamic-extrapolation/thermoextrap/xtrapy/xstack.py'>

In [120]:
out = xstack.states_xcoefs_concat(sr).pipe(xstack.to_mean_var, 'rep').pipe(xstack.stack_dataarray, xdims=['beta','order'], vdim='variable')

In [125]:
X = xstack.multiindex_to_array(out.indexes['xstack'])

In [95]:
def _states():
    shape = (3, 2, 4)
    dims = ['rec', 'pair', 'position']

    coords = {'position': np.linspace(0, 2, shape[-1])}

    xems = []
    for beta in [0.1, 10.0]:
        x = xr.DataArray(np.random.rand(*shape), dims=dims, coords=coords)
        u = xr.DataArray(np.random.rand(shape[0]), dims=dims[0])
        data = xpan_beta.DataCentralMomentsVals.from_vals(x, u, order=3, central=True)
        xems.append(xpan_beta.factory_extrapmodel(beta, data))
    s = xtrapy_core.StateCollection(xems)

    return s.resample(nrep=3)


def test_mean_var():
    states = _states()

    x = states[0].xcoefs(norm=False)

    out = xstack.to_mean_var(x, dim='rep')
    xr.testing.assert_allclose(out.sel(variable='mean', drop=True), x.mean('rep'))

    xr.testing.assert_allclose(out.sel(variable='var', drop=True), x.var('rep'))


In [96]:
test_mean_var()

In [92]:
xr.testing.assert_allclose(out.sel(variable='mean', drop=True), x.mean('rep'))

In [80]:
a = xstack.to_mean_var(sr[0].xcoefs(norm=False), dim='rep').sel(variable='mean', drop=True)

In [81]:
b = sr[0].xcoefs(norm=False).mean('rep')

In [82]:
xr.testing.assert_allclose(a, b)

In [69]:
type(Y.indexes['ystack'])

pandas.core.indexes.multi.MultiIndex

In [60]:
Y_unstack.sel(variable='mean', drop=True)

In [57]:
Y = stack_xr_for_gp(Y_unstack, xdims=['beta','order'], ydims=['pair','position'], policy='infer', variable='variable')

In [59]:
Y.indexes

xstack: MultiIndex([( 0.1, 0),
                    ( 0.1, 1),
                    ( 0.1, 2),
                    ( 0.1, 3),
                    (10.0, 0),
                    (10.0, 1),
                    (10.0, 2),
                    (10.0, 3)],
                   names=['beta', 'order'])
ystack: MultiIndex([('a',                0.0),
                    ('a', 0.6666666666666666),
                    ('a', 1.3333333333333333),
                    ('a',                2.0),
                    ('b',                0.0),
                    ('b', 0.6666666666666666),
                    ('b', 1.3333333333333333),
                    ('b',                2.0)],
                   names=['pair', 'position'])
variable: Index(['mean', 'var'], dtype='object', name='variable')

In [29]:
a = list(Y_unstack.dims)
a

['beta', 'variable', 'order', 'pair', 'position']

In [30]:
a.remove('beta')

In [39]:
xr.concat?

In [38]:
np.array(list(Y.indexes['xstack'].values))

array([[ 0.1,  0. ],
       [ 0.1,  1. ],
       [ 0.1,  2. ],
       [ 0.1,  3. ],
       [10. ,  0. ],
       [10. ,  1. ],
       [10. ,  2. ],
       [10. ,  3. ]])

In [25]:
a

{'order', 'pair', 'position', 'variable'}

In [16]:
Y

In [9]:
X = get_x_from_stack(Y)

In [67]:
for meta, y in Y.groupby('ystack'):
    print('pair: {}, position: {}'.format(*meta))
    break

pair: 0, position: 0.0


In [77]:
Y.indexes['xstack'].names

FrozenList(['beta', 'order'])

In [None]:
np.random.rand(100)

In [None]:
def wrap_X_input_array():
    """provide an array x with same shape as target X"""
    pass