In [12]:
from common import *

In [None]:
def wmean(a, weights):
    '''Params - a: either a {observations}x{features} `DataFrame`/`ndarray` or a 1D `ndarray`
                    if `ndarray`, must not contain NaN
                weights: weights of same shape as a
    Weighted mean.'''
    return (a * weights).sum(axis=0) / (weights + only_nans(a)).sum(axis=0)

pd.DataFrame.wmean = wmean
pd.Series.wmean = wmean

In [2]:
def wvar(a, weights, ddof=0):
    '''Params - a: either a {observations}x{features} `DataFrame`/`ndarray` or a 1D `ndarray`
                    if `ndarray`, must not contain NaN
                weights: weights of same shape as a
                ddof: mimics ddof of unweighted stats calculation in scipy
    Weighted variance. Every +1 on ddof corresponds to weights.mean() deducted from the denominator.'''
    w = weights + only_nans(a)
    a_mean = (a * w).sum(axis=0) / w.sum(axis=0)
    return (w * (a - a_mean) ** 2).sum(axis=0) / (w.sum(axis=0) - w.mean(axis=0) * ddof)

pd.DataFrame.wvar = wvar
pd.Series.wvar = wvar

In [None]:
def weighted_percentile_transform(X, weights, range=None):
    W = weights + X * 0.
    try:
        X.replace(np.nan, np.inf, inplace=True)
        Wsort = W.values_sort(X)
        rev = X.values.argsort(axis=0).argsort(axis=0)
    finally:
        X.replace(np.inf, np.nan, inplace=True)
    wavg, wsum = W.mean(), W.sum()
    Wavg, Wsum = wavg.values[np.newaxis, :], wsum.values[np.newaxis, :]
    pct = (Wavg / 2 + Wsort.cumsum(axis=0) - Wsort / 2) / (Wsum + Wavg)
    pct = pct[rev, np.arange(pct.shape[1])]
    if range:
        pct *= range[1] - range[0]
        pct -= (range[1] - range[0]) / 2
    return pd.DataFrame(pct, index=X.index, columns=X.columns)

pd.DataFrame.wpct = weighted_percentile_transform

In [245]:
def wmedian(a, weights, ddof=0):
    '''Params - a: either a {observations}x{features} `DataFrame`/`ndarray` or a 1D `ndarray`
                    if `ndarray`, must not contain NaN
                weights: weights of same shape as a
                ddof: mimics ddof of unweighted stats calculation in scipy
    Weighted variance. Every +1 on ddof corresponds to weights.mean() deducted from the denominator.'''
    orig_type = None
    if not isinstance(a, np.ndarray):
        orig_type = a.min(axis=0) * 0
        a = np.ma.array(a.values, mask=np.isnan(a.values))
    w = weights
    if not isinstance(w, np.ndarray):
        w = np.ma.array(w.values, mask=np.isnan(w.values))
    order = a.argsort(axis=0)
    ordereda = a[(order,) + ((np.arange(a.shape[1]),) if len(a.shape) > 1 else ())]
    orderedw = w[(order,) + ((np.arange(a.shape[1]),) if len(a.shape) > 1 else ())]
    cumdist = orderedw.cumsum(axis=0)
    half = cumdist.max(axis=0) / 2
    atleast = np.ma.array(ordereda, mask=(cumdist < half))
    median = atleast.min(axis=0)
    median = np.ma.filled(median, np.nan)
    if orig_type is not None:
        return median + orig_type
    return median

In [None]:
def wskew(a, weights, ddof=0):
    '''Params - a: either a {observations}x{features} `DataFrame`/`ndarray` or a 1D `ndarray`
                    if `ndarray`, must not contain NaN
                weights: weights of same shape as a
                ddof: mimics ddof of unweighted stats calculation in scipy
    Weighted skew. Every +1 on ddof corresponds to weights.mean() deducted from the denominator.'''
    pass