In [42]:
import pandas as pd
import xarray as xr
import numpy as np

from __future__ import print_function
import pavey

In [31]:
# random groupings
def build_random_sample(X,samp_axis,min_chunk=4,max_chunk=20,shuffle=True):

    nsamp = X.shape[samp_axis]
    
    idx = np.arange(nsamp)
    if shuffle:
        np.random.shuffle(idx)
    
    
        
    i0=0
    i1=np.random.randint(min_chunk,max_chunk)
    XA = []
    while True:
        XA.append(np.take(X,idx[i0:i1],axis=samp_axis))
        if i1==nsamp:
            break
        i0 = i1
        i1 += np.random.randint(min_chunk,max_chunk)
        i1 = min(nsamp,i1)
        
    return XA

In [186]:
# base data
axis = 0

v_np = np.random.rand(100,4)
v_xr = xr.DataArray(v_np,dims=('x','y'))
v_pd = v_xr.to_dataframe(name='v')['v']


sample = build_random_sample(v_np, axis)
ave = np.array([x.mean(axis) for x in sample])
var = np.array([x.var(axis) for x in sample])
wt = np.array([x.shape[axis] for x in sample])

In [214]:
X = v_np.T
X -= X.mean(1)[:,None]

In [240]:
dims = list(v_xr.dims)
_ = dims.remove('x')
r0 = dict(zip(dims, map(lambda x: x + '_0', dims)))
r1 = dict(zip(dims, map(lambda x: x + '_1', dims)))



In [250]:
x0 = v_xr.rename(r0)
x1 = v_xr.rename(r1)

In [256]:
x0.dot(x1)

<xarray.DataArray (y_0: 4, y_1: 4)>
array([[ 8.13463 ,  0.657419,  1.136866, -1.074879],
       [ 0.657419,  7.909767,  1.075082,  0.905915],
       [ 1.136866,  1.075082,  8.618421, -0.346577],
       [-1.074879,  0.905915, -0.346577,  8.255462]])
Dimensions without coordinates: y_0, y_1

In [257]:
(x0 * x1).mean('x')

<xarray.DataArray (y_0: 4, y_1: 4)>
array([[ 0.081346,  0.006574,  0.011369, -0.010749],
       [ 0.006574,  0.079098,  0.010751,  0.009059],
       [ 0.011369,  0.010751,  0.086184, -0.003466],
       [-0.010749,  0.009059, -0.003466,  0.082555]])
Dimensions without coordinates: y_0, y_1

In [244]:
v_xr.dot(v_xr)

<xarray.DataArray ()>
array(32.91828003665449)

In [238]:
v_xr.rename(r0) * v_xr.rename

<xarray.DataArray (x_0: 100, y_0: 4)>
array([[-0.457548, -0.243828,  0.130556, -0.016882],
       [-0.221269,  0.150712, -0.524698,  0.499147],
       [-0.250976,  0.085459,  0.295354, -0.236381],
       ..., 
       [-0.153244, -0.443865, -0.133151, -0.324366],
       [-0.319026, -0.402579, -0.209883,  0.246809],
       [-0.396466, -0.161654, -0.203841,  0.374956]])
Dimensions without coordinates: x_0, y_0

In [234]:
dims1


NameError: name 'dims1' is not defined

In [221]:
ddof=0
fact = X.shape[1] - ddof
np.dot(X,X.T.conj()) / fact

array([[ 0.0813463 ,  0.00657419,  0.01136866, -0.01074879],
       [ 0.00657419,  0.07909767,  0.01075082,  0.00905915],
       [ 0.01136866,  0.01075082,  0.08618421, -0.00346577],
       [-0.01074879,  0.00905915, -0.00346577,  0.08255462]])

In [201]:
np.cov(v_np.T,ddof=0)

array([[ 0.0813463 ,  0.00657419,  0.01136866, -0.01074879],
       [ 0.00657419,  0.07909767,  0.01075082,  0.00905915],
       [ 0.01136866,  0.01075082,  0.08618421, -0.00346577],
       [-0.01074879,  0.00905915, -0.00346577,  0.08255462]])

In [202]:
pavey.cov_nd(v_np, axis=0, ddof=0)

array([[ 0.0813463 ,  0.00657419,  0.01136866, -0.01074879],
       [ 0.00657419,  0.07909767,  0.01075082,  0.00905915],
       [ 0.01136866,  0.01075082,  0.08618421, -0.00346577],
       [-0.01074879,  0.00905915, -0.00346577,  0.08255462]])

In [203]:
v_np.var(0)

array([ 0.0813463 ,  0.07909767,  0.08618421,  0.08255462])

In [215]:
%psource np.cov

In [109]:
print('mean is:',v_np.mean(axis))
print('var  is:',v_np.var(axis))

mean is: [ 0.47711983  0.49040939  0.47524581  0.45797249]
var  is: [ 0.08745301  0.08211158  0.08107752  0.0891661 ]


In [112]:
r = pavey.RunningStatsVec.from_stats(wt, ave, var,axis=0)

In [115]:
print('weighted mean is:', r.mean())
print('weighted var  is:', r.var())
print('weighted ')

weighted mean is: [ 0.47711983  0.49040939  0.47524581  0.45797249]
weighted var  is: [ 0.08745301  0.08211158  0.08107752  0.0891661 ]
weighted 


# RunningStats works in a variety of ways:

# RunningStats List

In [48]:
r = pavey.RunningStatsList.from_stats(w=wt, a=ave, var=var)

In [86]:
rm = r.combine(block_size=None)

In [91]:
print('mean is:',rm.mean())
print('var  is:',rm.var())

mean is: [ 0.47368372]
var  is: [ 0.09024512]


# nan?

In [177]:
n = 100
v_np = np.random.rand(n,4)

msk = (np.random.rand(n) > 0.8)
v_np[msk] = np.nan


v_xr = xr.DataArray(v_np,coords={'x':np.arange(n),'y':np.arange(4)},dims=('x','y'))
v_pd = v_xr.to_dataframe(name='v')['v']
#v_pd = pd.Series(v_np)

In [145]:
import bottleneck

In [146]:
print(v_np.mean())
print(np.nanmean(v_np))
print(bottleneck.nanmean(v_np))


nan
0.484358808651
0.484358808651


In [169]:
v_xr

<xarray.DataArray (x: 100, y: 4)>
array([[      nan,       nan,       nan,       nan],
       [ 0.850103,  0.68067 ,  0.132077,  0.770434],
       [ 0.175458,  0.596899,  0.28303 ,  0.852245],
       ..., 
       [ 0.638981,  0.626607,  0.635223,  0.071265],
       [ 0.777563,  0.079153,  0.255498,  0.392216],
       [ 0.351051,  0.547302,  0.606576,  0.980819]])
Dimensions without coordinates: x, y

In [159]:
v_pd.groupby('y').mean()

y
0    0.523091
1    0.441723
2    0.433260
3    0.539361
Name: v, dtype: float64

In [168]:
np.zeros_like(v_xr) + v_xr

<xarray.DataArray (x: 100, y: 4)>
array([[      nan,       nan,       nan,       nan],
       [ 0.850103,  0.68067 ,  0.132077,  0.770434],
       [ 0.175458,  0.596899,  0.28303 ,  0.852245],
       ..., 
       [ 0.638981,  0.626607,  0.635223,  0.071265],
       [ 0.777563,  0.079153,  0.255498,  0.392216],
       [ 0.351051,  0.547302,  0.606576,  0.980819]])
Dimensions without coordinates: x, y

In [160]:
v_xr.mean(['x'])

<xarray.DataArray (y: 4)>
array([ 0.523091,  0.441723,  0.43326 ,  0.539361])
Dimensions without coordinates: y

In [125]:
v_xr.mean('x')

<xarray.DataArray ()>
array(0.5158197323572882)