In [2]:
import pandas as pd
from qlib.data import D
import qlib

In [3]:
qlib.init(
    provider_uri="/data/home/dinghj/zr-alphagen/zr-alpha-training-base/.qlib/qlib_data/cn_data",
    region="cn"
)

[98153:MainThread](2025-05-23 00:44:23,826) INFO - qlib.Initialization - [config.py:420] - default_conf: client.


[98153:MainThread](2025-05-23 00:44:25,180) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[98153:MainThread](2025-05-23 00:44:25,181) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/data/home/dinghj/zr-alphagen/zr-alpha-training-base/.qlib/qlib_data/cn_data')}


In [4]:
factor_data = D.features(
    instruments=D.instruments(market='all'),
    fields=["-1 * Std(Div($close, Ref($close, 1)), 5)"],
    start_time="2021-01-01",
    end_time="2024-12-30",
    freq="day",
)
# 收益率，默认取当天的 next period return
label_data = D.features(
    instruments=D.instruments(market='all'),
    fields=["Ref($close, -1)/$close - 1"],
    start_time="2021-01-01",
    end_time="2024-12-30",
    freq="day",
)

In [5]:
factor_data

Unnamed: 0_level_0,Unnamed: 1_level_0,"-1 * Std(Div($close, Ref($close, 1)), 5)"
instrument,datetime,Unnamed: 2_level_1
BJ430017,2023-05-31,
BJ430017,2023-06-01,
BJ430017,2023-06-02,-0.051149
BJ430017,2023-06-05,-0.036812
BJ430017,2023-06-06,-0.031051
...,...,...
SZ399300,2024-12-24,-0.006361
SZ399300,2024-12-25,-0.006345
SZ399300,2024-12-26,-0.006371
SZ399300,2024-12-27,-0.005719


In [6]:
label_data

Unnamed: 0_level_0,Unnamed: 1_level_0,"Ref($close, -1)/$close - 1"
instrument,datetime,Unnamed: 2_level_1
BJ430017,2023-05-31,-0.060606
BJ430017,2023-06-01,0.011730
BJ430017,2023-06-02,-0.012560
BJ430017,2023-06-05,-0.004892
BJ430017,2023-06-06,-0.007866
...,...,...
SZ399300,2024-12-24,0.000487
SZ399300,2024-12-25,0.000464
SZ399300,2024-12-26,-0.001618
SZ399300,2024-12-27,0.004526


In [7]:
df = factor_data.join(label_data, how="inner").dropna()
df.columns = ["factor", "label"]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,factor,label
instrument,datetime,Unnamed: 2_level_1,Unnamed: 3_level_1
BJ430017,2023-06-02,-0.051149,-0.012560
BJ430017,2023-06-05,-0.036812,-0.004892
BJ430017,2023-06-06,-0.031051,-0.007866
BJ430017,2023-06-07,-0.027172,-0.001982
BJ430017,2023-06-08,-0.009173,-0.010924
...,...,...,...
SZ399300,2024-12-24,-0.006361,0.000487
SZ399300,2024-12-25,-0.006345,0.000464
SZ399300,2024-12-26,-0.006371,-0.001618
SZ399300,2024-12-27,-0.005719,0.004526


In [8]:
from typing import List, Tuple, Optional
records = []
prev_longs: Optional[set] = None
prev_shorts: Optional[set] = None

for date, group in df.groupby(level="datetime"):
    f = group["factor"]
    l = group["label"]

    # 多头 / 空头筛选
    long_cut = f.quantile(1 - 0.2)
    short_cut = f.quantile(0.2)

    longs = set(f[f >= long_cut].index.get_level_values("instrument"))
    shorts = set(f[f <= short_cut].index.get_level_values("instrument"))

    long_ret = l[f >= long_cut].mean()
    short_ret = -l[f <= short_cut].mean()
    pnl = (long_ret + short_ret) / 2

    if prev_longs is None:
        turnover = float("nan")
    else:
        long_out = prev_longs - longs
        long_in = longs - prev_longs
        # 空头进出
        short_out = prev_shorts - shorts
        short_in = shorts - prev_shorts
        trades = len(long_out) + len(long_in) + len(short_out) + len(short_in)
        denom = len(prev_longs) + len(prev_shorts)
        turnover = trades / denom if denom > 0 else float("nan")
        
    records.append((date, pnl, turnover))
    prev_longs, prev_shorts = longs, shorts

pnl_df = pd.DataFrame(
    records, columns=["datetime", "pnl", "turnover"]
).set_index("datetime")
pnl = pnl_df.sort_index()

In [20]:
import numpy as np
# 先算日度 IC/RankIC series
df = factor_data.join(label_data, how="inner").dropna()
df.columns = ["factor", "label"]
ic_series = df.groupby(level="datetime").apply(
    lambda x: x["factor"].corr(x["label"])
)
rank_ic_series = df.groupby(level="datetime").apply(
    lambda x: x["factor"].rank().corr(x["label"].rank())
)

results = []
# 年度 & 全样本两轮
def agg_period(pnl_s, to_s, ic_s, ric_s, name):
    n = len(pnl_s)
    # 年化收益 ?或取均值*252
    cum_ret = pnl_s.add(1).prod() - 1
    ann_ret = (1 + cum_ret) ** (252 / n) - 1 if n > 0 else np.nan
    # 年化换手
    ann_turn = round(to_s.mean(), 2)
    # 夏普
    mu, sigma = pnl_s.mean(), pnl_s.std(ddof=1)
    sharpe = mu / sigma * np.sqrt(252) if sigma and n > 1 else np.nan
    sharpe = round(sharpe, 2)
    # IC / RankIC
    ic_m = round(ic_s.mean(), 3)
    ric_m = round(ric_s.mean(), 3)
    # MaxDrawdown
    cum = pnl_s.add(1).cumprod()
    dd = (cum - cum.cummax()) / cum.cummax()
    max_dd = "{:.2%}".format(dd.min())
    # Fitness
    fitness = round(sharpe * (abs(ann_ret / ann_turn)) ** 0.5, 2)
    ann_ret = "{:.2%}".format(ann_ret)

    return {
        "period": name,
        "AnnRet": ann_ret,
        "AnnTurn": ann_turn,
        "Sharpe": sharpe,
        "IC": ic_m,
        "RankIC": ric_m,
        "MaxDD": max_dd,
        "Fitness": fitness,
    }

# 按年分组
pnl_s = pnl["pnl"]
to_s = pnl["turnover"]
for year, idx in pnl_s.groupby(pnl_s.index.year):
    mask = pnl_s.index.year == year
    results.append(
        agg_period(
            pnl_s[mask],
            to_s[mask],
            ic_series[mask],
            rank_ic_series[mask],
            str(year),
        )
    )
# 全样本
results.append(
    agg_period(pnl_s, to_s, ic_series, rank_ic_series, "total")
)

perf_df = pd.DataFrame(results).set_index("period")

In [21]:
print(perf_df)

        AnnRet  AnnTurn  Sharpe     IC  RankIC    MaxDD  Fitness
period                                                          
2021     5.67%     0.47    0.77  0.009   0.050   -8.15%     0.27
2022    12.86%     0.51    1.87  0.017   0.056   -5.23%     0.94
2023     8.14%     0.47    1.01  0.008   0.059   -6.71%     0.42
2024     2.18%     0.52    0.23  0.012   0.054  -16.07%     0.05
total    7.15%     0.49    0.78  0.011   0.055  -16.07%     0.30
