In [1]:
import unittest
from np_base_func import *
import numpy as np
import pandas as pd
import numpy.testing as npt

In [2]:
from tqdm import tqdm

In [3]:
df = pd.read_csv("./test/WindPrices.adjclose.csv")
df = df.set_index("TrdDate")
df.index = pd.to_datetime(df.index.astype('str'))

In [4]:
#nan_columns = df.columns[df.isna().all()]
#df = df.drop(columns=nan_columns)

In [5]:
#numpy test的stack overflow
# https://stackoverflow.com/questions/3302949/best-way-to-assert-for-numpy-array-equality

In [6]:
df_0_na = df.fillna(0)

In [7]:
def test_equal1(func1, func2, df_0_na):
    for i in range(100):
        code_index = np.random.randint(0, len(df_0_na.columns))
        code = df_0_na.columns[code_index]
        print("testing: ", code)
        npt.assert_array_almost_equal(func1(df_0_na[code]), func2(df_0_na[code]))

def test_equal2(func1, func2, df_0_na):
    for i in range(100):
        code_index = np.random.randint(0, len(df_0_na.columns))
        code = df_0_na.columns[code_index]
        window = np.random.randint(len(df_0_na))
        try:
            npt.assert_array_almost_equal(func1(df_0_na[code], window), func2(df_0_na[code], window), decimal=5)
        except:
            print(code, window)

def test_equal3(func1, func2, df_0_na):
    for i in range(100):
        code_index1 = np.random.randint(0, len(df_0_na.columns))
        code_index2 = np.random.randint(0, len(df_0_na.columns))
        code1 = df_0_na.columns[code_index1]
        code2 = df_0_na.columns[code_index2]
        window = np.random.randint(len(df_0_na))
        x1 = func1(df_0_na[code1], df_0_na[code2], window)
        x2 = func2(df_0_na[code1], df_0_na[code2], window)
        
        #npt.assert_array_almost_equal(x1, x2)
        npt.assert_array_almost_equal(x1, x2, decimal=5)

In [8]:
#pandas 版本的结论
# region Auxiliary functions
  
import numpy as np
import pandas as pd
from numpy import abs
from numpy import log
from numpy import sign
from scipy.stats import rankdata
def pd_ts_sum(df, window=10):
    """
    Wrapper function to estimate rolling sum.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    
    return df.rolling(window).sum()

def pd_sma(df, window=10):
    """
    Wrapper function to estimate SMA.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).mean()

def pd_stddev(df, window=10):
    """
    Wrapper function to estimate rolling standard deviation.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).std()

def pd_correlation(x, y, window=10):
    """
    Wrapper function to estimate rolling corelations.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).corr(y)

def pd_covariance(x, y, window=10):
    """
    Wrapper function to estimate rolling covariance.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return x.rolling(window).cov(y)

def pd_rolling_rank(na):
    """
    Auxiliary function to be used in pd.rolling_apply
    :param na: numpy array.
    :return: The rank of the last value in the array.
    """
    return rankdata(na)[-1]

def pd_ts_rank(df, window=10):
    """
    Wrapper function to estimate rolling rank.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series rank over the past window days.
    """
    return df.rolling(window).apply(rolling_rank)

def rolling_prod(na):
    """
    Auxiliary function to be used in pd.rolling_apply
    :param na: numpy array.
    :return: The product of the values in the array.
    """
    return np.prod(na)

def pd_product(df, window=10):
    """
    Wrapper function to estimate rolling product.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series product over the past 'window' days.
    """
    return df.rolling(window).apply(rolling_prod)

def pd_ts_min(df, window=10): #window内的最小值，ts_argmin 输出的是这个最小值在window的位置
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series min over the past 'window' days.
    """
    return df.rolling(window).min()

def pd_ts_max(df, window=10):
    """
    Wrapper function to estimate rolling min.
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: a pandas DataFrame with the time-series max over the past 'window' days.
    """
    return df.rolling(window).max()

def pd_delta(df, period=1):
    """
    Wrapper function to estimate difference.
    :param df: a pandas DataFrame.
    :param period: the difference grade.
    :return: a pandas DataFrame with today’s value minus the value 'period' days ago.
    """
    return df.diff(period)

def pd_delay(df, period=1):
    """
    Wrapper function to estimate lag.
    :param df: a pandas DataFrame.
    :param period: the lag grade.
    :return: a pandas DataFrame with lagged time series
    """
    return df.shift(period)

def pd_rank(df):
    """
    Cross sectional rank
    :param df: a pandas DataFrame.
    :return: a pandas DataFrame with rank along columns.
    """
    #return df.rank(axis=1, pct=True)
    return df.rank(pct=True)

def pd_scale(df, k=1):
    """
    Scaling time serie.
    :param df: a pandas DataFrame.
    :param k: scaling factor.
    :return: a pandas DataFrame rescaled df such that sum(abs(df)) = k
    """
    return df.mul(k).div(np.abs(df).sum())

def pd_ts_argmax(df, window=10):
    """
    Wrapper function to estimate which day ts_max(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return df.rolling(window).apply(np.argmax) 

def pd_ts_argmin(df, window=10):  #window 内最小的元素是window的第几个
    """
    Wrapper function to estimate which day ts_min(df, window) occurred on
    :param df: a pandas DataFrame.
    :param window: the rolling window.
    :return: well.. that :)
    """
    return df.rolling(window).apply(np.argmin)

In [12]:
test_equal2(ts_sum, pd_ts_sum, df_0_na)
test_equal2(delay, pd_delay, df_0_na)
test_equal2(delta, pd_delta, df_0_na)
test_equal3(ts_covariance, pd_covariance, df_0_na)

In [24]:
def test_correlation(func1, func2, df_0_na):
    for i in range(100):
        code_index1 = np.random.randint(0, len(df_0_na.columns))
        code_index2 = np.random.randint(0, len(df_0_na.columns))
        code1 = df_0_na.columns[code_index1]
        code2 = df_0_na.columns[code_index2]
        window = np.random.randint(len(df_0_na))
        x1 = func1(df_0_na[code1], df_0_na[code2], window)
        x2 = func2(df_0_na[code1], df_0_na[code2], window)
        #对于pandas有些结果会是inf，而numpy我的处理是返回nan，此时不应该认为不对
        x1 = np.nan_to_num(x1, nan=np.nan, posinf=np.nan, neginf=np.nan)
        x2 = np.nan_to_num(x2, nan=np.nan, posinf=np.nan, neginf=np.nan)
        
        x1 = np.where(np.abs(x1)<0.000001, np.nan, x1)
        x2 = np.where(np.abs(x2)<0.000001, np.nan, x2)
        
        #npt.assert_array_almost_equal(x1, x2)
        
        try:
            npt.assert_array_almost_equal(x1, 
                                          x2,
                                         decimal=5)
        except:
            print(code1, code2, window)

In [25]:
test_correlation(ts_correlation, pd_correlation, df_0_na)

In [16]:
test_equal2(scale, pd_scale, df_0_na)

In [17]:
test_equal2(ts_sma, pd_sma, df_0_na)

In [27]:
test_equal2(ts_stddev, pd_stddev, df_0_na)

In [None]:
def test_ts_product():
    #当window超过一定数值的时候，会发生溢出。所以应该增加这部分检验。
    pass

In [49]:
test_equal2(ts_min, pd_ts_min, df_0_na)

In [50]:
test_equal2(ts_max, pd_ts_max, df_0_na)

In [55]:
test_equal2(ts_argmin, pd_ts_argmin, df_0_na)

In [11]:
test_equal2(ts_argmax, pd_ts_argmax, df_0_na)