In [37]:
from datetime import datetime
from functools import lru_cache

import numpy as np
import pandas as pd
import plotly.express as px
from tqdm import tqdm

from gplearn.genetic import SymbolicRegressor

from vnpy.trader.database import get_database
from vnpy.trader.constant import Interval, Exchange

# 数据处理

In [None]:
db = get_database()

In [None]:
bars = db.load_bar_data(
    symbol="i888",
    exchange=Exchange.DCE,
    interval=Interval.MINUTE,
    start=datetime(2010, 1, 1),
    end=datetime.now()
)

In [None]:
df = pd.DataFrame.from_dict(b.__dict__ for b in bars)
df.head()

In [None]:
new_df = pd.DataFrame({
    "datetime": df["datetime"],
    "open_price": df["open_price"],
    "high_price": df["high_price"],
    "low_price": df["low_price"],
    "close_price": df["close_price"],
    "volume": df["volume"],
    "turnover": df["turnover"],
    "open_interest": df["open_interest"],
})

In [None]:
new_df.to_feather("i888.fth")

# 遍历回测

In [17]:
df = pd.read_feather("i888.fth")
df.set_index("datetime", inplace=True)

In [18]:
df = df[df.index > "2022-1-1"]

In [20]:
df.reset_index().to_feather("data.fth")

In [4]:
df["fast_ma"] = df["close_price"].rolling(20).mean()
df["slow_ma"] = df["close_price"].rolling(100).mean()
df["signal"] = (df["fast_ma"] - df["slow_ma"]).fillna(0)

In [42]:
def run_backtesting(
    df: pd.DataFrame,
    window: int = 10000,
    tp_percent: float = 0.05,
    sl_percent: float = 0.05,
    quantile: float = 0.2,
    capital: int = 1_000_000,
    commission: float = 3 / 10000,
    show_progress: bool = False
) -> None:
    """执行回测任务"""
    # 滚动窗口计算分位数
    df["long_entry"] = df["signal"].rolling(window).quantile(1 - quantile)
    df["short_entry"] = df["signal"].rolling(window).quantile(quantile)

    # 初始化状态变量
    pos = 0         # 持仓
    fee = 0         # 交易成本
    long_sl = 0     # 多头止损
    long_tp = 0     # 多头止盈
    short_sl = 0    # 空头止损
    short_tp = 0    # 空头止盈
    result = {}     # 持仓结果字典

    # 遍历计算仓位
    it = df.rolling(window)
    if show_progress:
        it = tqdm(it, total=(len(df)))

    for v in it:
        # 检查数据长度
        if len(v.index) < window:
            continue

        # 提取数据
        row = v.iloc[-1, :]

        last_ix = row.name
        last_signal = row.signal
        last_price = row.close_price
        long_entry = row.long_entry
        short_entry = row.short_entry

        # 记录当前仓位
        result[last_ix] = pos

        # 重置交易成本
        fee = 0

        # 判断开仓
        if not pos:
            if last_signal >= long_entry:
                pos = int(round(capital / last_price))
                fee = abs(pos * last_price * commission)

                long_sl = last_price * (1 - sl_percent)
                long_tp = last_price * (1 + tp_percent)
            elif last_signal <= short_entry:
                pos = -int(round(capital / last_price))
                fee = pos * last_price * commission

                short_sl = last_price * (1 + sl_percent)
                short_tp = last_price * (1 - tp_percent)
        # 多头平仓
        elif pos > 0:
            if last_price >= long_tp or last_price <= long_sl:
                fee = abs(pos * last_price * commission)

                pos = 0
                long_sl = 0
                long_tp = 0
        # 空头平仓
        elif pos < 0:
            if last_price <= short_tp or last_price >= short_sl:
                fee = abs(pos * last_price * commission)

                pos = 0
                short_sl = 0
                short_tp = 0                
    
    # 统计盈亏结果
    df["pos"] = pd.Series(result)
    df["change"] = (df["close_price"] - df["close_price"].shift(1)).fillna(0)
    df["trade"] = (df["pos"] - df["pos"].shift(1)).fillna(0)
    df["fee"] = abs(df["trade"] * df["close_price"] * commission)
    df["pnl"] = df["change"] * df["pos"] - df["fee"]

    df["signal_nav"] = df["pnl"].cumsum() / capital + 1
    df["index_nav"] = df["close_price"] / df["close_price"].iat[0]

    return df

In [43]:
def calculate_sharpe(nav: pd.Series) -> float:
    """计算夏普比率"""
    r = nav.pct_change()
    return r.mean() / r.std()

In [44]:
df = run_backtesting(df)

KeyError: 'signal'

In [8]:
calculate_sharpe(df["signal_nav"])

0.003196011545968546

In [25]:
@lru_cache
def load_df() -> pd.DataFrame:
    """读取DataFrame"""
    return pd.read_feather("data.fth")

In [45]:
def _fitness(
    y: np.ndarray,          # 收盘价序列
    y_pred: np.ndarray,     # 信号值序列
    w: np.ndarray
) -> float:
    """计算适应度"""
    # 读取DataFrame
    df: pd.DataFrame = load_df()

    # 检查y数据长度
    if len(y) < len(df):
        return 0

    # 插入信号列
    df["signal"] = y

    # 执行回测
    df = run_backtesting(df)

    # 计算适应度
    sharpe: float = calculate_sharpe(df["signal_nav"])

    return sharpe

In [46]:
from gplearn.fitness import make_fitness

my_fitness = make_fitness(function=_fitness, greater_is_better=True)


In [47]:
load_df.cache_clear()
df = load_df()
df = df.drop(["datetime"], axis=1)
x_train = df.to_numpy()
y_train = df["close_price"].to_numpy()

In [48]:
est_gp = SymbolicRegressor(
    population_size=10,
    generations=20,
    stopping_criteria=0.01,
    p_crossover=0.7,
    p_subtree_mutation=0.1,
    p_hoist_mutation=0.05,
    p_point_mutation=0.1,
    max_samples=0.9,
    verbose=1,
    parsimony_coefficient=0.01,
    random_state=0,
    metric=my_fitness
)
est_gp.fit(x_train, y_train)

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    25.00     -0.000966773        9     -0.000966773     -0.000966773     63.38m
   1     6.00     -0.000966773        7     -0.000966773     -0.000966773     53.24m
   2     1.80     -0.000966773        3     -0.000966773     -0.000966773     48.96m
   3     1.00     -0.000966773        1     -0.000966773     -0.000966773     46.00m
   4     1.20     -0.000966773        1     -0.000966773     -0.000966773     42.75m
   5     1.60     -0.000966773        1     -0.000966773     -0.000966773     40.05m
   6     1.20     -0.000966773        1     -0.000966773     -0.000966773     37.07m
   7     1.00     -0.000966773        1     -0.000966773     -0.000966773     34.00m
   8     1.00     -0.000966773        1     -0.000966773     -0.000966773  