In [2]:
import numpy as np
import pandas as pd
from numpy import abs
from numpy import log
from numpy import sign
from scipy.stats import rankdata

<h3>估计滚动最小值函数ts_min()实例

In [23]:
def ts_min(df, window=10):
    return df.rolling(window).min()

data = {'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'],
        'Value1': [10, -15, 7, 20, -5],
        'Value2': [0.5, 0.2, -0.1, 0.3, 0.8]
        }

df = pd.DataFrame(data)

# 将日期列转换为日期时间类型
df['Date'] = pd.to_datetime(df['Date'])

# 将日期列设置为索引
df.set_index('Date', inplace=True)

rolling_min = ts_min(df, window=2)

print(df,"\n")

print(rolling_min)

            Value1  Value2
Date                      
2023-01-01      10     0.5
2023-01-02     -15     0.2
2023-01-03       7    -0.1
2023-01-04      20     0.3
2023-01-05      -5     0.8 

            Value1  Value2
Date                      
2023-01-01     NaN     NaN
2023-01-02   -15.0     0.2
2023-01-03   -15.0    -0.1
2023-01-04     7.0    -0.1
2023-01-05    -5.0     0.3


<h3>估计滚动最大值函数ts_max()实例

In [24]:
def ts_max(df, window=10):
    return df.rolling(window).max()

rolling_max = ts_max(df, window=3)

print(rolling_max)

            Value1  Value2
Date                      
2023-01-01     NaN     NaN
2023-01-02     NaN     NaN
2023-01-03    10.0     0.5
2023-01-04    20.0     0.3
2023-01-05    20.0     0.8


<h3>估计差值 delta()

In [25]:
def delta(df, period=1):
    return df.diff(period)

print(delta(df,period=2))

            Value1  Value2
Date                      
2023-01-01     NaN     NaN
2023-01-02     NaN     NaN
2023-01-03    -3.0    -0.6
2023-01-04    35.0     0.1
2023-01-05   -12.0     0.9


<h3>估计滞后 delay()

In [26]:
def delay(df, period=1):
    return df.shift(period)

print(delay(df,period=2))

            Value1  Value2
Date                      
2023-01-01     NaN     NaN
2023-01-02     NaN     NaN
2023-01-03    10.0     0.5
2023-01-04   -15.0     0.2
2023-01-05     7.0    -0.1


### 横截面等级 rank()

注意此函数求得【列】方向上的排名 ，详见下例

In [27]:
def rank(df):
    return df.rank(pct=True)

print(rank(df))

            Value1  Value2
Date                      
2023-01-01     0.8     0.8
2023-01-02     0.2     0.4
2023-01-03     0.6     0.2
2023-01-04     1.0     0.6
2023-01-05     0.4     1.0


### 缩放时间序列 scale()

In [28]:
def scale(df, k=1):
    return df.mul(k).div(np.abs(df).sum())

print(scale(df,k=50))

               Value1     Value2
Date                            
2023-01-01   8.771930  13.157895
2023-01-02 -13.157895   5.263158
2023-01-03   6.140351  -2.631579
2023-01-04  17.543860   7.894737
2023-01-05  -4.385965  21.052632


### 估计 ts_max(df, window) 发生的对应的窗口内的位置(日期) te_argmax()

In [29]:
def ts_argmax(df, window=10):
    return df.rolling(window).apply(np.argmax) + 1

print(ts_argmax(df,window=3))

            Value1  Value2
Date                      
2023-01-01     NaN     NaN
2023-01-02     NaN     NaN
2023-01-03     1.0     1.0
2023-01-04     3.0     3.0
2023-01-05     2.0     3.0


### 估计 ts_min(df, window) 发生的对应的窗口内的位置(日期) te_argmin()

In [31]:
def ts_argmin(df, window=10):
    return df.rolling(window).apply(np.argmin) + 1

print(ts_argmin(df,window=3))

            Value1  Value2
Date                      
2023-01-01     NaN     NaN
2023-01-02     NaN     NaN
2023-01-03     2.0     3.0
2023-01-04     1.0     2.0
2023-01-05     3.0     1.0


### 实现线性加权移动平均 decay_linear()

In [6]:
data = {'Date': ['2023-01-01', '2023-01-02', '2023-01-03', '2023-01-04', '2023-01-05'],
        'Value1': [10, -15, 7, 20, -5]
        }

df = pd.DataFrame(data)

# 将日期列转换为日期时间类型
df['Date'] = pd.to_datetime(df['Date'])

# 将日期列设置为索引
df.set_index('Date', inplace=True)

def decay_linear(df, period=10):
    # 清洗数据
    if df.isnull().values.any():
        df.fillna(method='ffill', inplace=True)
        df.fillna(method='bfill', inplace=True)
        df.fillna(value=0, inplace=True)
    na_lwma = np.zeros_like(df)
    na_lwma[:period, :] = df.iloc[:period, :] 
    na_series = df.to_numpy()
    print("na_series:\n",na_series)

    divisor = period * (period + 1) / 2
    y = (np.arange(period) + 1) * 1.0 / divisor# y = [1/3, 2/3]
    print("y:",y)
    # 用实际收盘价估算实际lwma
    # 确保回测引擎不受过度拟合影响
    for row in range(period - 1, df.shape[0]):
        x = na_series[row - period + 1: row + 1, :] # x = [10, -15]
        print("x:\n",x)
        na_lwma[row, :] = (np.dot(x.T, y))
        print("na_lwma[row, :]",na_lwma[row, :])
    return pd.DataFrame(na_lwma, index=df.index, columns=['CLOSE'])

print(decay_linear(df,period=2))

na_series:
 [[ 10]
 [-15]
 [  7]
 [ 20]
 [ -5]]
y: [0.33333333 0.66666667]
x:
 [[ 10]
 [-15]]
na_lwma[row, :] [-6]
x:
 [[-15]
 [  7]]
na_lwma[row, :] [0]
x:
 [[ 7]
 [20]]
na_lwma[row, :] [15]
x:
 [[20]
 [-5]]
na_lwma[row, :] [3]
            CLOSE
Date             
2023-01-01     10
2023-01-02     -6
2023-01-03      0
2023-01-04     15
2023-01-05      3
