In [1]:
import pandas as pd
import numpy as np

# Load Data

In [None]:
# file path
xlsx_path = "E:\\5 Code\\2025_cu_qmim\\data\ \Dataset.xlsx"

In [3]:
frames = []
with pd.ExcelFile(xlsx_path, engine="openpyxl") as xls:
    for sh in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name=sh, header=0, na_values=["#N/A", "#N/A N/A", "N/A", "NA"])
        if df.empty:
            continue
        
        # clean and parse date column
        df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
        df = df.dropna(subset=["Date"]).drop_duplicates("Date").sort_values("Date")
        df = df.set_index("Date")
        
        # convert all to numeric
        df[df.columns] = df[df.columns].apply(pd.to_numeric, errors="coerce")
        
        # multiindex columns: stock, metric
        df.columns = pd.MultiIndex.from_product([[sh], df.columns], names=["stock", "metric"])
        frames.append(df)

# combine all
wide = pd.concat(frames, axis=1, join="outer", copy=False, sort=False)

# reindex from first trading day
start_date = pd.Timestamp("2010-11-08")
wide = wide.reindex(pd.bdate_range(start_date, wide.index.max())).sort_index(axis=1, level=[0,1])

print("Loaded", len(frames), "sheets | Shape:", wide.shape)
wide.head()

Loaded 1012 sheets | Shape: (3916, 30360)


stock,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,...,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity
metric,BETA_ADJ_OVERRIDABLE,CURRENT_EV_TO_T12M_EBITDA,CURRENT_MARKET_CAP_SHARE_CLASS,CUR_RATIO,DVD_PAYOUT_RATIO,EBITDA_MARGIN,EQY_DVD_YLD_12M,FREE_CASH_FLOW_YIELD,GROSS_MARGIN,OPER_MARGIN,...,RETURN_ON_ASSET,TOT_DEBT_TO_EBITDA,TOT_DEBT_TO_TOT_EQY,TURNOVER,VOLATILITY_10D,VOLATILITY_180D,VOLATILITY_30D,VOLATILITY_360D,VOLATILITY_60D,VOLATILITY_90D
2010-11-08,1.0684,17.6845,12531.6808,,,,,7.4271,,,...,,,,,,,,,,
2010-11-09,1.0688,17.3688,12289.2215,,,,,7.5736,,,...,,,,,,,,,,
2010-11-10,1.0686,17.2696,12213.02,,,,,7.6208,,,...,,,,,,,,,,
2010-11-11,1.0682,17.4815,12375.8141,,,,,7.5206,,,...,,,,,,,,,,
2010-11-12,1.0611,17.7657,12594.0274,,,,,7.3903,,,...,,,,,,,,,,


In [12]:
df_beta = wide.loc[:, wide.columns.get_level_values("metric") == "BETA_ADJ_OVERRIDABLE"]
df_beta.to_csv("df_beta.csv")


# EDA

In [4]:
# High-level overview
n_dates   = len(wide.index)
stocks    = wide.columns.get_level_values('stock').unique()
metrics   = wide.columns.get_level_values('metric').unique()
print({
    "date_range": (wide.index.min(), wide.index.max()),
    "n_dates": n_dates,
    "n_stocks": len(stocks),
    "n_metrics": len(metrics),
    "non_null_ratio": wide.notna().mean().mean()
})

{'date_range': (Timestamp('2010-11-08 00:00:00'), Timestamp('2025-11-10 00:00:00')), 'n_dates': 3916, 'n_stocks': 1012, 'n_metrics': 30, 'non_null_ratio': np.float64(0.5083378248892082)}


In [5]:
# percent of non-missing per column
coverage_col = (wide.notna().sum() / n_dates).rename("coverage")

# turn into a stocks x metrics table (values in 0–1)
coverage_matrix = (coverage_col.unstack("metric").sort_index())
coverage_matrix

metric,BETA_ADJ_OVERRIDABLE,CURRENT_EV_TO_T12M_EBITDA,CURRENT_MARKET_CAP_SHARE_CLASS,CUR_RATIO,DVD_PAYOUT_RATIO,EBITDA_MARGIN,EQY_DVD_YLD_12M,FREE_CASH_FLOW_YIELD,GROSS_MARGIN,OPER_MARGIN,...,RETURN_ON_ASSET,TOT_DEBT_TO_EBITDA,TOT_DEBT_TO_TOT_EQY,TURNOVER,VOLATILITY_10D,VOLATILITY_180D,VOLATILITY_30D,VOLATILITY_360D,VOLATILITY_60D,VOLATILITY_90D
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A UN Equity,0.963994,0.963994,0.963994,0.011236,0.010725,0.011236,0.874362,0.963994,0.011236,0.011236,...,0.011236,0.011236,0.011236,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994
AA UN Equity,0.963994,0.915475,0.963994,0.010470,0.006384,0.010470,0.701992,0.963994,0.010470,0.010470,...,0.010470,0.009704,0.010470,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994
AAL UW Equity,0.765322,0.668795,0.765832,0.010470,0.006639,0.010470,0.428754,0.765832,0.000000,0.010470,...,0.010470,0.008938,0.003575,0.765832,0.764556,0.742850,0.762002,0.719867,0.758172,0.754341
AAON UW Equity,0.963994,0.963994,0.963994,0.010470,0.010215,0.010470,0.963994,0.963994,0.010470,0.010470,...,0.010470,0.010470,0.010470,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994
AAPL UW Equity,0.963994,0.963994,0.963994,0.000000,0.000000,0.000000,0.851124,0.963994,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZG UW Equity,0.918795,0.440245,0.919305,0.010470,0.003320,0.009448,0.000000,0.890194,0.010470,0.010470,...,0.009704,0.004852,0.010470,0.919305,0.918029,0.896323,0.915475,0.873340,0.911645,0.907814
ZION UW Equity,0.963994,0.000000,0.963994,0.000000,0.009704,0.000000,0.963994,0.000000,0.000000,0.010470,...,0.010470,0.000000,0.010470,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994,0.963994
ZM UW Equity,0.421093,0.421859,0.421604,0.005363,0.005618,0.005107,0.000000,0.421604,0.006639,0.006639,...,0.004341,0.005107,0.005363,0.421604,0.420327,0.398621,0.417773,0.375638,0.413943,0.410112
ZS UW Equity,0.491062,0.000000,0.491573,0.006384,0.000255,0.005107,0.000000,0.435393,0.007150,0.007150,...,0.005618,0.000000,0.006384,0.491573,0.490296,0.468590,0.487743,0.445608,0.483912,0.480082


In [6]:
# Missingness ranking within each stock (which metrics are most missing)
# Share of missing per metric for each stock
missing_share = 1 - coverage_matrix  # 1 - availability
# For one stock (e.g., "A UN Equity") sorted by most missing:
missing_share.loc["A UN Equity"].sort_values(ascending=False).head(10)

metric
DVD_PAYOUT_RATIO       0.989275
CUR_RATIO              0.988764
EBITDA_MARGIN          0.988764
PROF_MARGIN            0.988764
GROSS_MARGIN           0.988764
OPER_MARGIN            0.988764
TOT_DEBT_TO_TOT_EQY    0.988764
RETURN_COM_EQY         0.988764
TOT_DEBT_TO_EBITDA     0.988764
RETURN_ON_ASSET        0.988764
Name: A UN Equity, dtype: float64

Apparently, some of the above features need treatment. We proceed with forward fill

In [7]:
wide = wide.ffill()

In [8]:
# Recheck missingness
coverage_col = (wide.notna().sum() / n_dates).rename("coverage")
coverage_matrix = (coverage_col.unstack("metric").sort_index())
missing_share = 1 - coverage_matrix
missing_share.loc["A UN Equity"].sort_values(ascending=False)

metric
EQY_DVD_YLD_12M                   0.092952
DVD_PAYOUT_RATIO                  0.015322
EBITDA_MARGIN                     0.015322
OPER_MARGIN                       0.015322
GROSS_MARGIN                      0.015322
CUR_RATIO                         0.015322
PROF_MARGIN                       0.015322
RETURN_COM_EQY                    0.015322
TOT_DEBT_TO_EBITDA                0.015322
RETURN_ON_ASSET                   0.015322
TOT_DEBT_TO_TOT_EQY               0.015322
CURRENT_MARKET_CAP_SHARE_CLASS    0.000000
BETA_ADJ_OVERRIDABLE              0.000000
CURRENT_EV_TO_T12M_EBITDA         0.000000
PX_LAST                           0.000000
PX_HIGH                           0.000000
PE_RATIO                          0.000000
FREE_CASH_FLOW_YIELD              0.000000
PX_TO_SALES_RATIO                 0.000000
PX_TO_BOOK_RATIO                  0.000000
PX_OPEN                           0.000000
PX_LOW                            0.000000
PX_VOLUME                         0.000000
TURN

Start date is 2010-11-08.

Reason: most fundamental metrics (e.g. Return on Asset) are released quarterly, and their earliest available values appear around December 31, 2010. Starting from the first trading day of 2011 ensures all features have valid coverage and removes the early period where fundamentals are mostly missing.

In [9]:
wide = wide.loc[wide.index >= "2010-11-08"]

In [10]:
wide

stock,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,...,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity
metric,BETA_ADJ_OVERRIDABLE,CURRENT_EV_TO_T12M_EBITDA,CURRENT_MARKET_CAP_SHARE_CLASS,CUR_RATIO,DVD_PAYOUT_RATIO,EBITDA_MARGIN,EQY_DVD_YLD_12M,FREE_CASH_FLOW_YIELD,GROSS_MARGIN,OPER_MARGIN,...,RETURN_ON_ASSET,TOT_DEBT_TO_EBITDA,TOT_DEBT_TO_TOT_EQY,TURNOVER,VOLATILITY_10D,VOLATILITY_180D,VOLATILITY_30D,VOLATILITY_360D,VOLATILITY_60D,VOLATILITY_90D
2010-11-08,1.0684,17.6845,12531.6808,,,,,7.4271,,,...,,,,,,,,,,
2010-11-09,1.0688,17.3688,12289.2215,,,,,7.5736,,,...,,,,,,,,,,
2010-11-10,1.0686,17.2696,12213.0200,,,,,7.6208,,,...,,,,,,,,,,
2010-11-11,1.0682,17.4815,12375.8141,,,,,7.5206,,,...,,,,,,,,,,
2010-11-12,1.0611,17.7657,12594.0274,,,,,7.3903,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-04,0.9994,25.1288,41413.7424,2.2479,20.9619,25.4862,0.6791,2.6123,51.0932,20.7135,...,17.9631,1.8233,134.7351,295872400.0,79.571,31.493,47.084,27.590,34.803,31.386
2025-11-05,0.9994,25.2418,41609.3577,2.2479,20.9619,25.4862,0.6759,2.6000,51.0932,20.7135,...,17.9631,1.8233,134.7351,191616900.0,79.452,31.708,47.827,27.721,35.051,31.544
2025-11-06,0.9968,25.3598,41813.4780,2.2479,20.9619,25.4862,0.6726,2.5873,51.0932,20.7135,...,17.9631,1.8233,134.7351,163948600.0,79.144,31.618,47.341,27.687,35.018,31.551
2025-11-07,0.9961,25.2369,41600.8527,2.2479,20.9619,25.4862,0.6760,2.6005,51.0932,20.7135,...,17.9631,1.8233,134.7351,178774500.0,78.498,31.402,47.466,27.692,35.003,31.581


Based on discussion, We align the dataset to actual trading days to ensure consistency and comparability across all stocks. Non-trading days (weekends, holidays) create artificial gaps that distort time-series patterns and cause invalid forward fills. Restricting to trading days guarantees that every row represents a true market session and that fills only occur across real overnight intervals. Starting from 2011-01-03, the first trading day of 2011, ensures most fundamentals are available, providing a complete, synchronized panel for K-means analysis.

In [11]:
import pandas_market_calendars as mcal

# use NYSE calendar
nyse = mcal.get_calendar("XNYS")
sched = nyse.schedule(start_date=wide.index.min(), end_date=wide.index.max())
trading_days = mcal.date_range(sched, frequency="1D") # tz-aware UTC close times
trading_days = (trading_days.tz_convert("America/New_York").normalize().tz_localize(None)) # set to naive time

# Keep only trading days
wide = wide.reindex(trading_days)

ModuleNotFoundError: No module named 'pandas_market_calendars'

In [None]:
a_un_px_last = wide.loc[:, ('A UN Equity', 'PX_LAST')]

# Preview
print(a_un_px_last.head())

2010-11-08    22.9860
2010-11-09    22.5413
2010-11-10    22.4015
2010-11-11    22.7001
2010-11-12    23.1004
Name: (A UN Equity, PX_LAST), dtype: float64


We align each stock’s features to the start of its tradable history. Concretely, for every stock we find the first date where PX_LAST is non-NA (t0) and set all columns for that stock to NA on rows before t0. If a stock’s PX_LAST is missing for the entire span, we drop that stock. This prevents look-ahead/availability bias where fundamentals (often reported earlier or sporadically) would appear before any valid price exists, ensures all signals used for modeling are contemporaneously available with prices, and keeps the panel consistent without affecting other stocks’ data on those same dates.

In [None]:
stocks = wide.columns.get_level_values('stock').unique()

to_drop = []
for s in stocks:
    px = wide.loc[:, (s, 'PX_LAST')]
    t0 = px.first_valid_index()

    if t0 is None: # if px_last is missing for all dates, just drop the stock
        to_drop.append(s)
    else:
        # before t0 all set to na
        mask = wide.index < t0
        if mask.any():
            wide.loc[mask, s] = np.nan

# delete stocks that miss price value entirely
if to_drop:
    wide = wide.drop(columns=to_drop, level='stock')
    print(f"Dropped {len(to_drop)} stocks with all-NA PX_LAST:", to_drop[:10])

In [None]:
wide

stock,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,...,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity
metric,BETA_ADJ_OVERRIDABLE,CURRENT_EV_TO_T12M_EBITDA,CURRENT_MARKET_CAP_SHARE_CLASS,CUR_RATIO,DVD_PAYOUT_RATIO,EBITDA_MARGIN,EQY_DVD_YLD_12M,FREE_CASH_FLOW_YIELD,GROSS_MARGIN,OPER_MARGIN,...,RETURN_ON_ASSET,TOT_DEBT_TO_EBITDA,TOT_DEBT_TO_TOT_EQY,TURNOVER,VOLATILITY_10D,VOLATILITY_180D,VOLATILITY_30D,VOLATILITY_360D,VOLATILITY_60D,VOLATILITY_90D
2010-11-08,1.0684,17.6845,12531.6808,,,,,7.4271,,,...,,,,,,,,,,
2010-11-09,1.0688,17.3688,12289.2215,,,,,7.5736,,,...,,,,,,,,,,
2010-11-10,1.0686,17.2696,12213.0200,,,,,7.6208,,,...,,,,,,,,,,
2010-11-11,1.0682,17.4815,12375.8141,,,,,7.5206,,,...,,,,,,,,,,
2010-11-12,1.0611,17.7657,12594.0274,,,,,7.3903,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-04,0.9994,25.1288,41413.7424,2.2479,20.9619,25.4862,0.6791,2.6123,51.0932,20.7135,...,17.9631,1.8233,134.7351,295872400.0,79.571,31.493,47.084,27.590,34.803,31.386
2025-11-05,0.9994,25.2418,41609.3577,2.2479,20.9619,25.4862,0.6759,2.6000,51.0932,20.7135,...,17.9631,1.8233,134.7351,191616900.0,79.452,31.708,47.827,27.721,35.051,31.544
2025-11-06,0.9968,25.3598,41813.4780,2.2479,20.9619,25.4862,0.6726,2.5873,51.0932,20.7135,...,17.9631,1.8233,134.7351,163948600.0,79.144,31.618,47.341,27.687,35.018,31.551
2025-11-07,0.9961,25.2369,41600.8527,2.2479,20.9619,25.4862,0.6760,2.6005,51.0932,20.7135,...,17.9631,1.8233,134.7351,178774500.0,78.498,31.402,47.466,27.692,35.003,31.581


In [None]:
import numpy as np
import pandas as pd

# 1. 基于 PX_LAST 构造 return / skew / kurt

# 取出价格矩阵：index=date, columns=stocks
px = wide.xs('PX_LAST', axis=1, level='metric')   # DataFrame (T × N)

daily_ret = px.pct_change()

windows = [10, 30, 90, 180, 360]
ret_feats = []

for w in windows:
    # w 日累计收益：P_t / P_{t-w} - 1
    ret_w = px / px.shift(w) - 1
    ret_w.name = f'RET_{w}D'
    ret_w.columns = pd.MultiIndex.from_arrays(
        [ret_w.columns, [f'RET_{w}D'] * ret_w.shape[1]],
        names=['stock', 'metric']
    )

    # w 日窗口内日收益的偏度、峰度（基于 daily_ret）
    skew_w = daily_ret.rolling(window=w, min_periods=w).skew()
    skew_w.columns = pd.MultiIndex.from_arrays(
        [skew_w.columns, [f'RET_SKEW_{w}D'] * skew_w.shape[1]],
        names=['stock', 'metric']
    )

    kurt_w = daily_ret.rolling(window=w, min_periods=w).kurt()
    kurt_w.columns = pd.MultiIndex.from_arrays(
        [kurt_w.columns, [f'RET_KURT_{w}D'] * kurt_w.shape[1]],
        names=['stock', 'metric']
    )

    ret_feats.extend([ret_w, skew_w, kurt_w])

# 把所有 return/skew/kurt 因子拼起来，加回 wide
ret_block = pd.concat(ret_feats, axis=1)
wide = pd.concat([wide, ret_block], axis=1).sort_index(axis=1)

# 2. 对非价格因子做 winsorize + 横截面 z-score 标准化

raw_price_metrics = ['PX_OPEN', 'PX_HIGH', 'PX_LOW', 'PX_LAST']

valuation_metrics = [
    'PE_RATIO',
    'PX_TO_BOOK_RATIO',
    'PX_TO_SALES_RATIO',
    'CURRENT_EV_TO_T12M_EBITDA'
]

log_metrics = [
    'CURRENT_MARKET_CAP_SHARE_CLASS',
    'PX_VOLUME',
    'TURNOVER'
]

def winsorize_row(row, lower=0.01, upper=0.99):
    if row.isna().all():
        return row
    lo, hi = row.quantile([lower, upper])
    return row.clip(lo, hi)

all_metrics = wide.columns.get_level_values('metric').unique()

# 关键：先把每个 metric 的 X_z 放进列表，最后一次 concat
blocks = []

for m in all_metrics:
    # 原始价格字段不做因子处理
    if m in raw_price_metrics:
        continue

    # 取出某个 metric 在所有股票上的截面矩阵
    X = wide.xs(m, axis=1, level='metric')   # index=date, columns=stock

    if X.notna().sum().sum() == 0:
        continue

    if m in valuation_metrics:
        X = -X

    if m in log_metrics:
        X = np.log1p(X)

    # 截面 winsorize
    X_w = X.apply(winsorize_row, axis=1)

    # 截面 z-score
    mean_cs = X_w.mean(axis=1)
    std_cs = X_w.std(axis=1).replace(0, np.nan)
    X_z = X_w.sub(mean_cs, axis=0).div(std_cs, axis=0)

    # 给列加 MultiIndex：(stock, metric)
    X_z.columns = pd.MultiIndex.from_arrays(
        [X_z.columns, [m] * X_z.shape[1]],
        names=['stock', 'metric']
    )

    blocks.append(X_z)

# 一次性拼起来，列一定是 MultiIndex
factors_std = pd.concat(blocks, axis=1)


print("Standardized factor shape:", factors_std.shape)
print(factors_std.columns.nlevels, factors_std.columns.names)


Standardized factor shape: (3775, 41492)
2 ['stock', 'metric']


In [None]:
# 让列按 stock 再按 metric 排序（先股票后因子）
factors_std = factors_std.sort_index(axis=1, level=['stock', 'metric'])

factors_std

stock,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,A UN Equity,...,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity,ZTS UN Equity
metric,BETA_ADJ_OVERRIDABLE,CURRENT_EV_TO_T12M_EBITDA,CURRENT_MARKET_CAP_SHARE_CLASS,CUR_RATIO,DVD_PAYOUT_RATIO,EBITDA_MARGIN,EQY_DVD_YLD_12M,FREE_CASH_FLOW_YIELD,GROSS_MARGIN,OPER_MARGIN,...,RET_SKEW_90D,TOT_DEBT_TO_EBITDA,TOT_DEBT_TO_TOT_EQY,TURNOVER,VOLATILITY_10D,VOLATILITY_180D,VOLATILITY_30D,VOLATILITY_360D,VOLATILITY_60D,VOLATILITY_90D
2010-11-08,-0.216365,-0.561247,0.599362,,,,,-0.029025,,,...,,,,,,,,,,
2010-11-09,-0.218998,-0.547761,0.591617,,,,,-0.024446,,,...,,,,,,,,,,
2010-11-10,-0.219737,-0.515250,0.584166,,,,,-0.020127,,,...,,,,,,,,,,
2010-11-11,-0.220877,-0.543838,0.595149,,,,,-0.030033,,,...,,,,,,,,,,
2010-11-12,-0.242840,-0.596429,0.617302,,,,,-0.046799,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-04,0.044900,0.072625,0.634673,0.165954,-0.262285,0.069275,-0.867377,-0.213230,0.168496,0.196345,...,-2.856820,-0.41734,-0.123093,1.429823,1.876713,-0.524081,0.800082,-0.645229,0.152040,-0.184016
2025-11-05,0.043679,0.072364,0.634538,0.165954,-0.262285,0.069275,-0.863070,-0.212960,0.168496,0.196345,...,-2.678237,-0.41734,-0.123093,1.010969,1.712043,-0.513120,0.770837,-0.636584,0.162988,-0.175971
2025-11-06,0.032101,0.066719,0.645252,0.165954,-0.262285,0.069275,-0.863324,-0.220390,0.168496,0.196345,...,-2.679360,-0.41734,-0.123093,0.880933,1.499851,-0.516976,0.636999,-0.641228,0.115067,-0.190150
2025-11-07,0.029801,0.069530,0.634293,0.165954,-0.262285,0.069275,-0.863413,-0.214970,0.168496,0.196345,...,-2.681477,-0.41734,-0.123093,1.007365,1.362102,-0.535212,0.601032,-0.643376,0.096368,-0.201983


In [None]:
# 提取 factors_std 里的所有“数据名”（第二层 metric）
metrics = factors_std.columns.get_level_values('metric').unique()

print("n_metrics:", len(metrics))
for m in metrics:
    print(m)

n_metrics: 41
BETA_ADJ_OVERRIDABLE
CURRENT_EV_TO_T12M_EBITDA
CURRENT_MARKET_CAP_SHARE_CLASS
CUR_RATIO
DVD_PAYOUT_RATIO
EBITDA_MARGIN
EQY_DVD_YLD_12M
FREE_CASH_FLOW_YIELD
GROSS_MARGIN
OPER_MARGIN
PE_RATIO
PROF_MARGIN
PX_TO_BOOK_RATIO
PX_TO_SALES_RATIO
PX_VOLUME
RETURN_COM_EQY
RETURN_ON_ASSET
RET_10D
RET_180D
RET_30D
RET_360D
RET_90D
RET_KURT_10D
RET_KURT_180D
RET_KURT_30D
RET_KURT_360D
RET_KURT_90D
RET_SKEW_10D
RET_SKEW_180D
RET_SKEW_30D
RET_SKEW_360D
RET_SKEW_90D
TOT_DEBT_TO_EBITDA
TOT_DEBT_TO_TOT_EQY
TURNOVER
VOLATILITY_10D
VOLATILITY_180D
VOLATILITY_30D
VOLATILITY_360D
VOLATILITY_60D
VOLATILITY_90D


In [None]:
factors_std.to_parquet("factors_std.parquet")