# Central moments

In [3]:
import numpy as np
import cmomy

import scipy.stats

In [4]:
# setup some test data
x = np.random.rand(100)

# number of moments
mom = 4

raw_mom = (x[:, None] ** np.arange(mom+1)).mean(0)

# cen_mom = ((x - x.mean())[:, None] ** np.arange(mom+1)).mean(0)
# prefer scipy.stats.moments as treats mom=1 correctly
cen_mom = scipy.stats.moment(x, moment=np.arange(mom+1))

In [11]:
x = np.random.rand(10000)

In [26]:
s = x.sum()

In [30]:
%timeit -n 1 -r 1 np.testing.assert_allclose(s, x.sum())

221 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [35]:
# using cmomy
s = cmomy.CentralMoments.from_vals(x, mom=mom)

Note first time calling a method might be a bit slow.
behind the scenes, cmomy uses numba jit compiler.
first call to function compiles the numba function
subsequent calls very fast

In [36]:
# attributes of s
# s.data -> 'moments data'
help(cmomy.CentralMoments.data)

Help on property:

    accessor to numpy underlying data
    
    By convention data has the following meaning for the moments indexes
    
    * `data[i_0=0,... i_n=0]`, if all moment indices are zero, this is the sum of weights
    * `data[i_0=0,... i_k=1, ... i_n=0]`, if only one moment indice is one and all
    others zero, then this is the average value of the variable with unit index.
    
    * all other cases, the central moments `<(x0-<x0>)**i0 * (x1 - <x1>)**i1 * ...>`



In [37]:
# access to raw moments
np.testing.assert_allclose(s.rmom(), raw_mom)

# access to central moments
np.testing.assert_allclose(s.cmom(), cen_mom)

In [41]:
# other attributes
np.testing.assert_allclose(s.mean(), x.mean())
np.testing.assert_allclose(s.var(), x.var(ddof=0))
np.testing.assert_allclose(s.weight(), len(x))

In [47]:
# converting raw/central
# note that this can run into numerical stability issues

raw = s.to_raw()
cen = s.data

# NOTE:  raw[i] = weight, i = 0
#               = <x**i>, i > 0

import cmomy.convert
cen2 = cmomy.convert.to_central_moments(raw)
raw2 = cmomy.convert.to_raw_moments(cen)


np.testing.assert_allclose(cen, cen2)
np.testing.assert_allclose(raw, raw2)




In [62]:
# so whats the point?  The real power is in combining statistics 
# using numerically stable methods based on central moments

x2 = np.random.rand(200)
xc = np.concatenate((x, x2), axis=0)

cen_mom = scipy.stats.moment(xc, moment=np.arange(mom+1))


In [65]:
# note, this is just for x2
s2 = cmomy.CentralMoments.from_vals(x2, mom=mom)
# sum two object
sc = s + s2

In [68]:
np.testing.assert_allclose(sc.cmom(), cen_mom)

In [71]:
# could also create one object and build from there
sc = cmomy.CentralMoments.from_vals(x, mom=mom)
sc.push_vals(x2)
np.testing.assert_allclose(sc.cmom(), cen_mom)


In [95]:
# resampling
ndat = len(x)
nrep = 20
idx = np.random.choice(ndat, (nrep, ndat), replace=True)
cen_mom = scipy.stats.moment(x[idx], axis=-1, moment=np.arange(mom+1))

# Note that scipy puts moments as first axes
# cmomy puts moments in last axes
# so move first to last
cen_mom = np.moveaxis(cen_mom, 0, -1)


s = cmomy.CentralMoments.from_resample_vals(x, indices=idx, mom=mom, resample_kws=dict(parallel=True))
np.testing.assert_allclose(s.cmom(), cen_mom)

In [126]:
# resampling much faster for big-ish arrays
y = np.random.rand(20000)
ndat = len(y)
nrep = 1000

idx = np.random.choice(ndat, (nrep, ndat), replace=True)

In [127]:
%%timeit -n 1 -r 1
cen_mom = scipy.stats.moment(y[idx], axis=-1, moment=np.arange(mom+1))

# Note that scipy puts moments as first axes
# cmomy puts moments in last axes
# so move first to last
cen_mom = np.moveaxis(cen_mom, 0, -1)

672 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [128]:
%%timeit -n 1 -r 1
s = cmomy.CentralMoments.from_resample_vals(y, indices=idx, mom=mom, resample_kws=dict(parallel=True))

120 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


## Arbitrary weights

In [130]:
x = np.random.rand(100)
w = np.random.rand(100)

s = cmomy.CentralMoments.from_vals(x=x, w=w, mom=mom)

In [134]:
cen_mom = np.average((x[:, None] - np.average(x, weights=w))**np.arange(mom+1), weights=w, axis=0)
cen_mom[1] = 0

In [136]:
np.testing.assert_allclose(cen_mom, s.cmom())

# Arbitrary shapes

In [146]:
x = np.random.rand(100, 20, 3)

cen_mom = scipy.stats.moment(x, moment=np.arange(mom+1), axis=0)
cen_mom = np.moveaxis(cen_mom, 0, -1)

s = cmomy.CentralMoments.from_vals(x, mom=mom)
np.testing.assert_allclose(cen_mom, s.cmom())

In [156]:
# and now we can take advantage of some stuff
# reduce along an axis
sr = s.reduce(axis=0)

test = scipy.stats.moment(x.reshape(-1, 3), moment=np.arange(mom+1), axis=0)
test = np.moveaxis(test, 0, -1)


In [160]:
np.testing.assert_allclose(sr.cmom(),test)

In [181]:
# resample s along axis=0
ndat = s.shape[0]
nrep = 50
idx = np.random.choice(ndat, (nrep, ndat), True)

sr = s.resample_and_reduce(indices=idx, axis=0, resample_kws=dict(parallel=True))

# some nasty rearranging to do the same with scipy.stat.moment
xx = np.take(x, idx.T, axis=1).reshape(-1, nrep, 3)
test = scipy.stats.moment(xx, moment=np.arange(mom+1), axis=0)
test = np.moveaxis(test, 0, -1)


np.testing.assert_allclose(test,sr.cmom())

In [182]:
%%timeit -n 1 -r 1
# also much faster
sr = s.resample_and_reduce(indices=idx, axis=0, resample_kws=dict(parallel=True))

479 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [183]:
%%timeit -n 1 -r 1
# some nasty rearranging to do the same with scipy.stat.moment
xx = np.take(x, idx.T, axis=1).reshape(-1, nrep, 3)
test = scipy.stats.moment(xx, moment=np.arange(mom+1), axis=0)
test = np.moveaxis(test, 0, -1)

5.27 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


# Wrapping things with xarray

xarray gives a beautiful interface for woring with multidimensional data

In [184]:
import xarray as xr

In [186]:
x = np.random.rand(100, 2, 3, 4)
xx = xr.DataArray(x, dims=['rec', 'a','b','c'])

mom = 4

In [193]:
s = cmomy.CentralMoments.from_vals(x, mom=mom, axis=0)
sx = cmomy.xCentralMoments.from_vals(xx, mom=mom, axis='rec')

In [194]:
np.testing.assert_allclose(s.data,sx.data)

In [196]:
# values property gives access to xarray object
sx.values

In [203]:
# do things with xarray
sx.reduce(axis='b')

<CentralMoments(val_shape=(2, 4), mom=(4,))>
<xarray.DataArray (a: 2, c: 4, mom_0: 5)>
array([[[ 3.00000000e+02,  4.96342030e-01,  8.74383561e-02,
         -8.05506472e-04,  1.30870844e-02],
        [ 3.00000000e+02,  5.09432790e-01,  8.51099685e-02,
         -1.52008518e-03,  1.24518930e-02],
        [ 3.00000000e+02,  4.63350459e-01,  7.76084712e-02,
          4.17696248e-03,  1.16270518e-02],
        [ 3.00000000e+02,  5.43134491e-01,  7.70857232e-02,
         -9.42877642e-04,  1.09813226e-02]],

       [[ 3.00000000e+02,  5.35590373e-01,  8.16685376e-02,
         -3.52729098e-03,  1.19769382e-02],
        [ 3.00000000e+02,  5.26272952e-01,  7.82368728e-02,
         -6.22937712e-04,  1.12757812e-02],
        [ 3.00000000e+02,  4.89474416e-01,  8.37243432e-02,
          2.81916286e-03,  1.23038060e-02],
        [ 3.00000000e+02,  4.92180451e-01,  8.73256720e-02,
          1.83216293e-03,  1.35473693e-02]]])
Dimensions without coordinates: a, c, mom_0

In [205]:
# do selection
sx.sel(b=[0,1])

<CentralMoments(val_shape=(2, 2, 4), mom=(4,))>
<xarray.DataArray (a: 2, b: 2, c: 4, mom_0: 5)>
array([[[[ 1.00000000e+02,  5.22928470e-01,  8.10922787e-02,
          -3.87429223e-03,  1.22037638e-02],
         [ 1.00000000e+02,  4.82147560e-01,  9.23895644e-02,
           2.30066514e-03,  1.40901144e-02],
         [ 1.00000000e+02,  4.40126452e-01,  8.72910854e-02,
           1.02323600e-02,  1.49570831e-02],
         [ 1.00000000e+02,  5.47977521e-01,  7.20903556e-02,
          -1.79085618e-04,  9.66249309e-03]],

        [[ 1.00000000e+02,  4.94693838e-01,  8.28014254e-02,
          -1.61248706e-03,  1.18812706e-02],
         [ 1.00000000e+02,  5.24124804e-01,  8.78407943e-02,
          -3.77429004e-03,  1.26239545e-02],
         [ 1.00000000e+02,  4.76841822e-01,  7.42559438e-02,
           3.42372904e-04,  1.00108199e-02],
         [ 1.00000000e+02,  5.56096595e-01,  7.77101129e-02,
          -2.02411446e-03,  1.10893831e-02]]],


       [[[ 1.00000000e+02,  5.64136930e-01,  7.903

# comoments

all the routines work with comoments as well
just pass mom=(mom_0, mom_1)

In [1]:
mom = (3, 3)

In [2]:
x = np.random.rand(100)
y = np.random.rand(100)
w = np.random.rand(100)

# NOTE: for co moments, pass in two variables as tuple (x, y)

s = cmomy.CentralMoments.from_vals((x, y), w=w, mom=mom)

NameError: name 'np' is not defined

In [239]:
# smart broadcasting
x = np.random.rand(100, 2, 3)
y = np.random.rand(100)

# this would throw an error
#s = cmomy.CentralMoments.from_vals((x, y), mom=mom)
yy = np.broadcast_to(y[:, None, None], x.shape)
s0 = cmomy.CentralMoments.from_vals((x, yy), mom=mom)

# or just do this
s1 = cmomy.CentralMoments.from_vals((x, y), mom=mom, broadcast=True)

np.testing.assert_allclose(s0.data, s1.data)

In [249]:
# super convinient to use xarray for this

xx = xr.DataArray(x, dims=['rec','a','b'])
yy = xr.DataArray(y, dims=['rec'])

# supose things in wrong order
xx = xx.transpose('a','b','rec')

sx = cmomy.xCentralMoments.from_vals((xx, yy), 
                                     mom=mom, 
                                     axis='rec', 
                                     # control moment dim naames
                                     mom_dims=['mom_x','mom_y'],
                                     broadcast=True)



In [246]:
np.testing.assert_allclose(sx.data, s0.data)

In [251]:
# note, can also just pass arrays to xCentral
sx1 = cmomy.xCentralMoments.from_vals((x,y), mom=mom, axis=0, 
                                      # dims of "value" part of arrays
                                      # i.e., x less reduced dimension
                                      dims=['a','b'],
                                      mom_dims=['mom_x','mom_y'],
                                      broadcast=True
                                     )

In [253]:
xr.testing.assert_allclose(sx.values, sx1.values)

In [255]:
# and of course, can do whatever now
sx.resample_and_reduce(nrep=20, axis='b')

<CentralMoments(val_shape=(20, 2), mom=(3, 3))>
<xarray.DataArray (rep: 20, a: 2, mom_x: 4, mom_y: 4)>
array([[[[ 3.00000000e+02,  5.32398835e-01,  9.67907858e-02,
          -6.26704586e-03],
         [ 5.17548149e-01,  7.45814115e-03, -6.83581433e-04,
           7.18028882e-04],
         [ 8.76377665e-02, -5.75884709e-04,  8.10572069e-03,
          -5.90222021e-04],
         [-1.14926653e-03,  2.00187872e-03, -2.50679327e-04,
           2.61460928e-04]],

        [[ 3.00000000e+02,  5.32398835e-01,  9.67907858e-02,
          -6.26704586e-03],
         [ 5.34106505e-01,  3.32911820e-03,  9.84209780e-04,
           6.53252757e-04],
         [ 8.36672040e-02, -6.17310471e-04,  8.34781180e-03,
          -7.48160727e-04],
         [-5.88788041e-03,  2.84477843e-04, -4.69525814e-04,
           1.32618740e-04]]],


       [[[ 3.00000000e+02,  5.32398835e-01,  9.67907858e-02,
...
           1.32618740e-04]]],


       [[[ 3.00000000e+02,  5.32398835e-01,  9.67907858e-02,
          -6.26704586