# Stock Factor Analysis Framework

## 1. Import Library

In [7]:
import dai
import time 
import random
import pandas as pd
import numpy as np
import bigcharts
from bigcharts import opts
from matplotlib.colors import LinearSegmentedColormap, to_hex

## 2. Factor Settings

In [8]:
factor_param_dict = {
    "sql_factor":"""
        SELECT
            date,
            instrument,
            -1 * (c_pct_rank(m_corr(close, volume, 5)) * 0.3 + c_pct_rank(m_corr(close, volume, 20)) * 0.3 + c_pct_rank(turn) * 0.4) AS factor,
        FROM cn_stock_prefactors_community
        ORDER BY date, instrument
    """,
    "sd": "2020-01-01",
    "ed": "2026-02-13",
    # 因子分析设定：
    "group_num":10, 
    "instruments_pool":"全市场",  # 可选：全市场、沪深300、中证100、中证500、中证1000、上证指数、深证100
    "benchmark":"沪深300",        # 可选：全市场、沪深300、中证100、中证500、中证1000、上证指数、深证100
    "is_data_process":False,
    # 因子策略设定：
    "stock_num":20,
    "capital_base":1000000,
    "weighting":"等权重", # 可选：等权重、市值权重、因子值权重、因子排名权重
    "rebalance":"每月",   # 可选：每日、每周、每月、每季度、每年
}

## 3. Factor Analysis

### 3.1 Factor Analysis Framework

In [9]:
class FactorAnalysis:

    def __init__(self, factor_param_dict):
        
        self.sd = factor_param_dict["sd"]
        self.ed = factor_param_dict["ed"]
        self.sql_factor_process = self.factor_process(factor_param_dict)
        self.df_factor = dai.query(self.sql_factor_process, filters={"date":[self.sd, self.ed]}).df()

        if len(self.df_factor) == 0:
            raise Exception("因子分析数据为空，请检查因子SQL或起始与终止时间是否正确！")
        
        self.table_summary = self.get_ic_summary(self.df_factor)
        self.table_yearly  = self.get_ic_yearly(self.df_factor)
        self.table_return  = self.get_ret(self.df_factor)

        self.c_ret = self.bigcharts_ret(self.df_factor)
        self.c_ic  = self.bigcharts_ic(self.df_factor)

        self.sql_factor_strategy = self.factor_strategy(factor_param_dict)
        self.df_strategy = dai.query(self.sql_factor_strategy, filters = {"date": [self.sd, self.ed]}).df()

        if len(self.df_strategy) == 0:
            raise Exception("因子策略数据为空，请检查因子SQL或起始与终止时间是否正确！")
    
    def factor_process(self, factor_param_dict):

        sql_factor = factor_param_dict["sql_factor"]

        if factor_param_dict["is_data_process"]:
            sql_is_data_process = "data_alpha_process"
        else:
            sql_is_data_process = "data_alpha_filter"
        
        map_pool = {
            "全市场":"1=1",  # 全市场
            "沪深300":"is_hs300=1",  # 沪深300
            "中证100":"is_zz100=1",  # 中证100
            "中证500":"is_zz500=1",  # 中证500
            "中证1000":"is_zz1000=1",  # 中证1000
            "上证指数":"is_szzs=1",  # 上证指数
            "深证100":"is_sz100=1",  # 深证100
        }

        map_benchmark = {
            "沪深300":"return_000300SH",  # 沪深300
            "中证100":"return_000903SH",  # 中证100
            "中证500":"return_000905SH",  # 中证500
            "中证1000":"return_000852SH",  # 中证1000
            "上证指数":"return_000001SH",  # 上证指数
            "深证100":"return_399330SZ",  # 深证100
        }

        sql_factor_processed = f"""
        WITH
        data_alpha AS (
            {sql_factor}
        ),
        data_alpha_origin AS (
            SELECT 
                date,
                instrument,
                factor,
            FROM data_alpha
            QUALIFY COLUMNS(*) IS NOT NULL AND factor != 'Infinity' AND factor != '-Infinity'
        ),
        data_alpha_filter AS (
            SELECT 
                date,
                instrument,
                factor,
            FROM data_alpha_origin JOIN cn_stock_prefactors_community USING (date, instrument)
            WHERE amount > 0
            AND is_risk_warning = 0
            AND name NOT LIKE '%ST%'
            AND name NOT LIKE '%退%'
            AND (instrument LIKE '%SH' OR instrument LIKE '%SZ')
            AND {map_pool[factor_param_dict["instruments_pool"]]}
            QUALIFY COLUMNS(*) IS NOT NULL
        ),
        data_alpha_process AS (
            SELECT 
                date,
                instrument,
                factor,
                clip(factor, c_avg(factor) - 3 * c_std(factor), c_avg(factor) + 3 * c_std(factor)) AS clipped_factor,
                c_normalize(clipped_factor) AS normalized_factor,
                c_neutralize(normalized_factor, sw2021_level1, LOG(total_market_cap)) AS neutralized_factor,
                neutralized_factor AS factor,
            FROM data_alpha_filter JOIN cn_stock_prefactors_community USING (date, instrument)
            QUALIFY COLUMNS(*) IS NOT NULL
            ORDER BY date, instrument
        ),
        data_combine AS (
            SELECT 
                date, 
                instrument, 
                factor,
                m_lead(open, 2) / m_lead(open, 1) - 1 AS ret,
                c_cbins(-1*factor, {factor_param_dict["group_num"]}) + 1 AS group,
                {map_benchmark[factor_param_dict["benchmark"]]} AS ret_bm,
            FROM {sql_is_data_process} JOIN cn_stock_prefactors_community USING (date, instrument)
            QUALIFY COLUMNS(*) IS NOT NULL
            ORDER BY date, factor DESC
        )
        SELECT *
        FROM data_combine
        QUALIFY COLUMNS(*) IS NOT NULL
        ORDER BY date, factor DESC
        """

        return sql_factor_processed
    
    def factor_strategy(self, factor_param_dict):

        if factor_param_dict["weighting"] == "市值权重":
            sql_weight = "float_market_cap / c_sum(float_market_cap)"
        elif factor_param_dict["weighting"] == "因子值权重":
            sql_weight = "score / c_sum(score)"
        elif factor_param_dict["weighting"] == "因子排名权重":
            sql_weight = "score_rank / c_sum(score_rank)"
        else:
            sql_weight = "1 / c_sum(1)"

        if factor_param_dict["rebalance"] == "每周":
            sql_rebalance = "is_week_end_trade = 1"
        elif factor_param_dict["rebalance"] == "每月":
            sql_rebalance = "is_month_end_trade = 1"
        elif factor_param_dict["rebalance"] == "每季":
            sql_rebalance = "is_quarter_end_trade = 1"
        elif factor_param_dict["rebalance"] == "每年":
            sql_rebalance = "is_year_end_trade = 1"
        else:
            sql_rebalance = "1=1"

        sql_trade = f"""
        WITH
        data_strategy AS (
            {self.sql_factor_process}
        ),
        data_filter AS (
            SELECT
                date,
                instrument,
                factor AS score,
                c_rank(factor) AS score_rank,
            FROM data_strategy
            QUALIFY score_rank <= {factor_param_dict["stock_num"]}
        ),
        data_date AS (
            SELECT
                date,
                instrument,
                score, 
                score_rank, 
                {sql_weight} AS position, 
            FROM data_filter JOIN cn_stock_valuation USING (date, instrument) JOIN mldt_cn_stock_calendar_daily USING (date)
            WHERE {sql_rebalance}
        )
        SELECT *
        FROM data_date
        ORDER BY date, score_rank
        """

        return sql_trade
    
    def get_ic_summary(self, df):

        df = df.copy()
        df["date"] = pd.to_datetime(df["date"])

        ic_ts = (
            df.groupby("date")
            .apply(lambda x: x["factor"].rank().corr(x["ret"].rank()))
            .dropna()
        )

        if ic_ts.empty:
            result = pd.DataFrame([{
                "IC": np.nan,
                "IR": np.nan,
                "IC绝对值大于0.02比例": np.nan
            }])
        else:
            IC = ic_ts.mean()
            IR = IC / ic_ts.std() if ic_ts.std() != 0 else np.nan
            ratio = (ic_ts.abs() > 0.02).mean()

            result = pd.DataFrame([{
                "IC": round(IC, 4),
                "IR": round(IR, 4),
                "IC绝对值大于0.02比例": round(ratio, 4)
            }])

        c = bigcharts.Chart(
            data=result,
            type_="table",
            chart_options=dict(
                title_opts=opts.ComponentTitleOpts(
                    title="IC整体统计指标"
                )
            ),
            y=list(result.columns)
        )

        return c.render(display=True)
    
    def get_ic_yearly(self, df):
        
        df = df.copy()
        df["date"] = pd.to_datetime(df["date"])

        ic_ts = (
            df.groupby("date")
            .apply(lambda x: x["factor"].rank().corr(x["ret"].rank()))
            .dropna()
            .to_frame("IC")
        )

        if ic_ts.empty:
            return pd.DataFrame(columns=["年份", "IC", "IR", "|IC|>0.02比例"])

        ic_ts["year"] = ic_ts.index.year

        rows = []
        for year, g in ic_ts.groupby("year"):
            ic_mean = g["IC"].mean()
            ic_std = g["IC"].std()
            ir = ic_mean / ic_std if ic_std != 0 else np.nan
            ratio = (g["IC"].abs() > 0.02).mean()

            rows.append({
                "年份": str(year),
                "IC": round(ic_mean, 4),
                "IR": round(ir, 4),
                "IC绝对值大于0.02比例": round(ratio, 4)
            })

        result = pd.DataFrame(rows).reset_index(drop=True)
        
        c = bigcharts.Chart(
                data=result,
                type_="table",
                chart_options=dict(
                    title_opts=opts.ComponentTitleOpts(
                        title="IC年度统计指标"
                    )
                ),
                y=list(result.columns)
            )

        return c.render(display=True)
    
    def get_ret(self, df):

        df = df.copy()
        df["date"] = pd.to_datetime(df["date"])
        df["group"] = df["group"].astype(str)

        trading_days = 252
        rf = 0.015

        group_ret = (
            df.groupby(["date", "group"])["ret"]
            .mean()
            .unstack()
            .sort_index()
        )
        bm = df.groupby("date")["ret_bm"].first().sort_index()
        group_ret["bm"] = bm.reindex(group_ret.index)

        group_cols = sorted([c for c in group_ret.columns if c != "bm"], key=lambda x: int(x))
        bottom, top = group_cols[0], group_cols[-1]
        ls_name = f"long_short({bottom}-{top})"
        group_ret[ls_name] = group_ret[bottom] - group_ret[top]

        def perf_stats(ret_series):
            ret_series = ret_series.dropna()
            if ret_series.empty:
                return [np.nan]*10

            bm_series = group_ret.loc[ret_series.index, "bm"].dropna()
            idx = ret_series.index.intersection(bm_series.index)
            r = ret_series.loc[idx]
            b = bm_series.loc[idx]

            total = r.sum()
            annual = r.mean() * trading_days
            ex_total = (r - b).sum()
            ex_annual = (r - b).mean() * trading_days

            std = r.std()
            vol = std * np.sqrt(trading_days) if std != 0 else np.nan
            sharpe = (((r - rf/trading_days).mean() / std) * np.sqrt(trading_days)) if std != 0 else np.nan
            info = (r.mean() / std) if std != 0 else np.nan

            nav = (1 + r).cumprod()
            mdd = (nav / nav.cummax() - 1).min()

            win = (r > 0).mean()
            ret_252 = r.tail(252).sum()

            return [total, annual, ex_total, ex_annual, sharpe, vol, info, mdd, win, ret_252]

        def ic_stats(sub_df):
            ic_ts = (sub_df
                .groupby("date")
                .apply(lambda x: x["factor"].rank().corr(x["ret"].rank()))
                .dropna()
            )

            if ic_ts.empty:
                return np.nan, np.nan, np.nan

            ic = ic_ts.mean()
            std = ic_ts.std()
            ir = ic / std if std != 0 else np.nan
            ic_252 = ic_ts.tail(252).mean()
            return ic, ir, ic_252

        rows = []

        for g in group_cols:
            sub = df[df["group"] == g]
            IC, IR, IC_252 = ic_stats(sub)

            stats = perf_stats(group_ret[g])
            rows.append([g, IC, IR] + stats + [IC_252])

        sub_ls = df[df["group"].isin([bottom, top])]
        IC, IR, IC_252 = ic_stats(sub_ls)

        stats = perf_stats(group_ret[ls_name])
        rows.append([ls_name, IC, IR] + stats + [IC_252])

        result = pd.DataFrame(
            rows,
            columns=[
                "组合",
                "IC",
                "IR",
                "总收益",
                "年化收益",
                "超额总收益",
                "超额年化收益",
                "夏普比率",
                "年化波动率",
                "信息比率",
                "最大回撤",
                "胜率",
                "近252日收益",
                "近252日IC平均值",
            ],
        ).round(4)

        c = bigcharts.Chart(
                data=result,
                type_="table",
                chart_options=dict(
                    title_opts=opts.ComponentTitleOpts(
                        title="分组收益指标"
                    )
                ),
                y=list(result.columns)
            )

        return c.render(display=True)
    
    def bigcharts_ret(self, df):
        
        data = df.copy()
        data["date"] = pd.to_datetime(data["date"])
        data["group"] = data["group"].astype(str)

        g = (data
            .groupby(["date", "group"], as_index=False)["ret"]
            .mean()
            .rename(columns={"ret": "g_ret"})
        )
        wide = g.pivot(index="date", columns="group", values="g_ret").sort_index()

        bm = data.groupby("date")["ret_bm"].first().sort_index()
        wide["bm"] = bm.reindex(wide.index)

        group_cols = sorted([c for c in wide.columns if c != "bm"], key=lambda x: int(x))
        bottom, top = group_cols[0], group_cols[-1]
        wide["ls"] = wide[bottom] - wide[top]

        cum = wide.cumsum()
        cum = cum.round(4)
        cum["date_str"] = cum.index.strftime("%Y-%m-%d")
        cum = cum.reset_index(drop=True)

        n = len(group_cols)
        cmap = LinearSegmentedColormap.from_list("r_y_g", ["#d73027", "#fee08b", "#1a9850"])
        gradient_colors = [to_hex(cmap(i/(n-1))) for i in range(n)]

        color_map = dict(zip(group_cols, gradient_colors))
        color_map["ls"] = "#7b3294" 
        color_map["bm"] = "#2166ac" 

        y_cols = group_cols + ["ls", "bm"]

        series_options = {col: {"itemstyle_opts": opts.ItemStyleOpts(color=color_map[col])} for col in y_cols}

        c = bigcharts.Chart(
            data=cum,
            type_="line",
            x="date_str",
            y=y_cols,
            chart_options=dict(
                legend_opts=opts.LegendOpts(pos_top="bottom"),
                title_opts=opts.TitleOpts(title="分组累计收益曲线", pos_top="top", pos_left="left"),
                xaxis_opts=opts.AxisOpts(split_number=8),
            ),
            series_options=series_options
        )

        return c.render(display=True)

    def bigcharts_ic(self, df):

        data = df.copy()
        data["date"] = pd.to_datetime(data["date"])

        ic = (
            data.groupby("date")
            .apply(lambda x: x["factor"].rank().corr(x["ret"].rank()))
            .dropna()
            .to_frame("IC每日值")
        )
        ic["IC22日平均值"] = ic["IC每日值"].rolling(22).mean()
        ic["IC累计值"] = ic["IC每日值"].cumsum()

        ic = ic.reset_index()
        ic["日期"] = ic["date"].dt.strftime("%Y-%m-%d")

        c_bar = bigcharts.Chart(
            data=ic,
            type_="bar",
            x="日期",
            y=["IC每日值"],
            series_options={
                "IC每日值": {
                    "itemstyle_opts": opts.ItemStyleOpts(color="#6baed6"),
                    "z": 1
                }
            },
        )

        c_area = bigcharts.Chart(
            data=ic,
            type_="line",
            x="日期",
            y=["IC22日平均值"],
            series_options={
                "IC22日平均值": {
                    "itemstyle_opts": opts.ItemStyleOpts(color="#e41a1c"),  # 红色
                    "linestyle_opts": opts.LineStyleOpts(width=2),
                    "areastyle_opts": opts.AreaStyleOpts(opacity=0.25),
                    "symbol": "none",
                    "z": 3
                }
            },
        )

        c_cum = bigcharts.Chart(
            data=ic,
            type_="line",
            x="日期",
            y=["IC累计值"],
            series_options={
                "IC累计值": {
                    "itemstyle_opts": opts.ItemStyleOpts(color="#ff7f0e"),  # 橙色
                    "linestyle_opts": opts.LineStyleOpts(width=2),
                    "yaxis_index": 1,
                    "symbol": "circle",
                    "z": 4
                }
            },
        )

        c = bigcharts.Chart(
            data=[c_bar, c_area, c_cum],
            type_="overlap",
            chart_options=dict(
                title_opts=opts.TitleOpts(
                    title="IC分析图",
                    pos_left="left"
                ),
                legend_opts=opts.LegendOpts(
                    pos_bottom="0%",
                    pos_left="center"
                ),
                xaxis_opts=opts.AxisOpts(
                    split_number=8,
                    axislabel_opts=opts.LabelOpts(rotate=0)
                ),
                yaxis_opts=opts.AxisOpts(
                    name="IC每日值",
                    name_gap=25
                ),
                extend_yaxis=[
                    opts.AxisOpts(
                        name="IC累计值",
                        name_gap=25
                    )
                ],
            ),
        )

        return c.render(display=True)

### 3.2 Factor Analysis Instance

In [10]:
factor_analysis = FactorAnalysis(factor_param_dict)

IC,IR,IC绝对值大于0.02比例
0.0516,0.381,0.8975


年份,IC,IR,IC绝对值大于0.02比例
2020,0.0447,0.3272,0.8884
2021,0.0404,0.3347,0.8807
2022,0.0566,0.4873,0.8843
2023,0.0501,0.3851,0.9008
2024,0.057,0.3899,0.905
2025,0.0594,0.3829,0.9177
2026,0.0582,0.3509,0.9643


组合,IC,IR,总收益,年化收益,超额总收益,超额年化收益,夏普比率,年化波动率,信息比率,最大回撤,胜率,近252日收益,近252日IC平均值
1,0.0031,0.0348,1.3946,0.2401,1.0866,0.187,1.0508,0.2142,0.0706,-0.2999,0.5471,0.3943,-0.0104
2,0.003,0.0566,1.4984,0.2579,1.1904,0.2049,1.0482,0.2318,0.0701,-0.3156,0.5492,0.47,0.0003
3,0.0015,0.0289,1.6051,0.2763,1.2971,0.2233,1.0883,0.2401,0.0725,-0.3204,0.556,0.536,0.0018
4,0.0015,0.0304,1.5605,0.2686,1.2525,0.2156,1.0253,0.2474,0.0684,-0.3364,0.5615,0.576,0.005
5,0.0049,0.0979,1.5158,0.2609,1.2078,0.2079,0.9694,0.2537,0.0648,-0.3337,0.5622,0.5323,0.0055
6,0.0038,0.077,1.4645,0.2521,1.1565,0.1991,0.9114,0.2601,0.061,-0.3396,0.5601,0.5268,0.0083
7,0.0068,0.1341,1.312,0.2258,1.004,0.1728,0.7786,0.2708,0.0525,-0.3372,0.5499,0.4572,0.0044
8,0.0089,0.1746,1.0256,0.1765,0.7176,0.1235,0.5739,0.2815,0.0395,-0.3604,0.5458,0.4055,0.0157
9,0.014,0.2649,0.6658,0.1146,0.3578,0.0616,0.3344,0.2979,0.0242,-0.4401,0.5328,0.2835,0.0176
10,0.041,0.4929,-0.6223,-0.1071,-0.9303,-0.1601,-0.3828,0.319,-0.0212,-0.7459,0.5007,0.1197,0.0433


## 4. Factor-Based Strategy Backtest

In [11]:
df_strategy = factor_analysis.df_strategy

In [12]:
from bigmodule import M

def BigTrader_Initialize(context):
    from bigtrader.finance.commission import PerOrder
    context.set_commission(PerOrder(buy_cost=0.0003, sell_cost=0.0013, min_cost=5))

def BigTrader_Before_Trading(context, data):
    pass

def BigTrader_Handle_Tick(context, tick):
    pass

def BigTrader_Handle_Data(context, data):

    df_now = context.data[context.data["date"] == data.current_dt.strftime("%Y-%m-%d")]

    if len(df_now) == 0:
        return
    
    target_instruments  = list(df_now["instrument"])
    holding_instruments = list(context.get_account_positions().keys())

    for instrument in holding_instruments:
        if instrument not in target_instruments:
            context.order_target_percent(instrument, 0)
        
    for i, x in df_now.iterrows():
        position = 0.0 if pd.isnull(x.position) else float(x.position)
        context.order_target_percent(x.instrument, position)

def BigTrader_Handle_Trade(context, trade):
    pass

def BigTrader_Handle_Order(context, order):
    pass

def BigTrader_After_Trading(context, data):
    pass

BigTrader = M.bigtrader.v34(
    
    data = df_strategy,
    
    start_date = """""",
    end_date   = """""",
    
    initialize           = BigTrader_Initialize,
    before_trading_start = BigTrader_Before_Trading,
    handle_tick          = BigTrader_Handle_Tick,
    handle_data          = BigTrader_Handle_Data,
    handle_trade         = BigTrader_Handle_Trade,
    handle_order         = BigTrader_Handle_Order,
    after_trading        = BigTrader_After_Trading,
    
    capital_base = 1000000  + random.uniform(0, 10),
    frequency="""daily""",
    product_type="""自动""",
    rebalance_period_type="""交易日""",
    rebalance_period_days="""1""",
    rebalance_period_roll_forward=True,
    backtest_engine_mode="""标准模式""",
    before_start_days=0,
    volume_limit=1,
    order_price_field_buy="""open""",
    order_price_field_sell="""open""",
    benchmark="""沪深300指数""",
    
    plot_charts=True,
    debug=False,
    backtest_only=False,
    m_name="""BigTrader"""
) 

[2026-02-22 22:02:46] [info     ] bigtrader.v34 开始运行 ..
[2026-02-22 22:02:46] [info     ] 2020-02-28, 2026-01-30, , , instruments=1121
[2026-02-22 22:02:46] [info     ] bigtrader module V2.2.0
[2026-02-22 22:02:46] [info     ] bigtrader engine v0.1.0.post9+g7a244b6 2026-02-10


[2026-02-22 22:02:55] [info     ] backtest done, raw_perf_ds:dai.DataSource("_dead47d504814852bcf17ce38bc1de35")


成交时间,合约代码,合约名称,买/卖,开/平,数量,成交价,成交金额,平仓盈亏,交易佣金
Loading... (need help?),,,,,,,,,

日期,合约代码,合约名称,持仓均价,收盘价,数量,持仓保证金,期权市值,浮动盈亏,平仓盈亏
Loading... (need help?),,,,,,,,,

时间,级别,内容
Loading... (need help?),,


[2026-02-22 22:02:56] [info     ] bigtrader.v34 运行完成 [10.247s].
