# Dividend Yield Forecasting (Document 2)

In [6]:

import pandas as pd
import numpy as np
from pathlib import Path
import statsmodels.api as sm
from sklearn.linear_model import LassoCV
from sklearn.preprocessing import StandardScaler

DATA_PATH = r'C:\Users\Zara\Documents\GitHub\prm-group-c\data\spx_data_weekly.xlsx'
PERIODS_PER_YEAR = 52

def load_panel(sheet):
    raw = pd.read_excel(DATA_PATH, sheet_name=sheet, header=None)
    lvl0 = raw.iloc[0].ffill()
    lvl1 = raw.iloc[1]
    lvl0.iloc[0] = 'meta'
    lvl1.iloc[0] = 'date'
    cols = pd.MultiIndex.from_arrays([lvl0, lvl1])
    data = raw.iloc[3:].copy()
    data.columns = cols
    idx = pd.to_datetime(data[('meta','date')])
    data = data.drop(columns=('meta','date'))
    data.index = idx
    return data

panel = load_panel('spx data')
prices = panel.xs('PX_LAST', axis=1, level=1).astype(float)
divs = panel.xs('EQY_DVD_YLD_IND', axis=1, level=1).astype(float)
full_tickers = prices.columns[prices.isna().sum() == 0]
prices = prices[full_tickers]
divs = divs[full_tickers]
returns = prices.pct_change().dropna()
divs = divs.loc[returns.index]
meta = pd.read_excel(DATA_PATH, sheet_name='spx names').set_index('ticker').loc[full_tickers]

additional = load_panel('additional data').xs('PX_LAST', axis=1, level=1).astype(float)
add_ret = additional.pct_change().loc[returns.index]
spy_returns = add_ret['SPY']
rf = add_ret['SHV'].fillna(0)
spy_excess = spy_returns - rf

sector_panel = load_panel('sector data').xs('PX_LAST', axis=1, level=1).astype(float)
sector_returns = sector_panel.pct_change().loc[returns.index]
sector_excess = sector_returns.sub(rf, axis=0)

mag_tickers = ['AAPL','MSFT','GOOG','AMZN','NVDA','META','TSLA']

from collections import namedtuple

StrategyOutputs = namedtuple('StrategyOutputs','lo ls pos_lo pos_ls stats corr')

def build_strategies(div_signal, ret, top_frac=0.2, weight=0.01):
    weights_lo = pd.DataFrame(0.0, index=div_signal.index, columns=div_signal.columns)
    weights_ls = pd.DataFrame(0.0, index=div_signal.index, columns=div_signal.columns)
    positions_lo = pd.DataFrame(0, index=div_signal.index, columns=div_signal.columns, dtype=int)
    positions_ls = pd.DataFrame(0, index=div_signal.index, columns=div_signal.columns, dtype=int)
    for dt, row in div_signal.iterrows():
        valid = row.dropna()
        if len(valid) < 10:
            continue
        top_n = max(int(len(valid) * top_frac), 1)
        longs = valid.nlargest(top_n).index
        weights_lo.loc[dt, longs] = weight
        weights_ls.loc[dt, longs] = weight
        positions_lo.loc[dt, longs] = 1
        positions_ls.loc[dt, longs] = 1
        shorts = valid.nsmallest(top_n).index
        weights_ls.loc[dt, shorts] = -weight
        positions_ls.loc[dt, shorts] = -1
    future = ret.shift(-1)
    lo = (weights_lo * future).sum(axis=1)
    ls = (weights_ls * future).sum(axis=1)
    return lo, ls, positions_lo, positions_ls

lo_ret, ls_ret, pos_lo, pos_ls = build_strategies(divs, returns)


def perf_stats(series):
    s = series.dropna()
    ann_mean = s.mean() * PERIODS_PER_YEAR
    ann_vol = s.std(ddof=1) * np.sqrt(PERIODS_PER_YEAR)
    ann_sharpe = ((s - rf.reindex(s.index)).mean() * PERIODS_PER_YEAR) / (
        (s - rf.reindex(s.index)).std(ddof=1) * np.sqrt(PERIODS_PER_YEAR)
    )
    skew = s.skew()
    var5 = s.quantile(0.05)
    cvar5 = s[s <= var5].mean()
    wealth = (1 + s).cumprod()
    mdd = ((wealth / wealth.cummax()) - 1).min()
    return [ann_mean, ann_vol, ann_sharpe, skew, var5, cvar5, mdd]

performance_table = pd.DataFrame(
    [perf_stats(lo_ret), perf_stats(ls_ret), perf_stats(spy_returns)],
    index=['Long-Only','Long-Short','SPY'],
    columns=['Annualized mean','Annualized vol','Sharpe','Skew','VaR 5%','CVaR 5%','Max drawdown']
)

corr_matrix = pd.concat(
    [lo_ret.rename('Long-Only'), ls_ret.rename('Long-Short'), spy_returns.rename('SPY')], axis=1
).corr()


def lin_factor_summary(y, X):
    y_ex = (y - rf).dropna()
    aligned = pd.concat([y_ex, X], axis=1).dropna()
    y_clean = aligned.iloc[:, 0]
    X_clean = aligned.iloc[:, 1:]
    model = sm.OLS(y_clean, sm.add_constant(X_clean)).fit()
    out = {'alpha_ann': model.params['const'] * PERIODS_PER_YEAR, 'r2': model.rsquared}
    for name, val in model.params.items():
        if name != 'const':
            out[f'beta_{name}'] = val
    return out

sector_results_lo = lin_factor_summary(lo_ret, sector_excess)
sector_results_ls = lin_factor_summary(ls_ret, sector_excess)
sector_sigma = sector_excess.std()

mag_returns = returns[mag_tickers].mean(axis=1)
mag_excess = mag_returns - rf
mag_lfd_lo = lin_factor_summary(lo_ret, pd.DataFrame({'SPY': spy_excess, 'MAG': mag_excess}))
mag_lfd_ls = lin_factor_summary(ls_ret, pd.DataFrame({'SPY': spy_excess, 'MAG': mag_excess}))

k1, k2 = 0.03, -0.03

def threshold_weights(signal):
    up = (signal - k1).clip(lower=0)
    down = (k2 - signal).clip(lower=0)
    return pd.DataFrame({'market': signal, 'up': up, 'down': down})


def rolling_sector_hedge(ret, factors, window=260):
    clean_idx = factors.dropna().index
    ret = ret.loc[clean_idx]
    factors = factors.loc[clean_idx]
    hedged = pd.Series(np.nan, index=ret.index)
    for i in range(window, len(ret)):
        y_hist = ret.iloc[i-window:i]
        X_hist = factors.iloc[i-window:i]
        model = sm.OLS(y_hist, sm.add_constant(X_hist)).fit()
        betas = model.params.drop('const')
        hedged.iloc[i] = ret.iloc[i] - (betas * factors.iloc[i]).sum()
    return hedged

hedged_lo = rolling_sector_hedge(lo_ret, sector_excess)
hedged_ls = rolling_sector_hedge(ls_ret, sector_excess)
hedged_stats = pd.DataFrame(
    [perf_stats(hedged_lo), perf_stats(hedged_ls)],
    index=['LO hedged','LS hedged'],
    columns=performance_table.columns
)


def forecast_diagnostics(positions, realized, label):
    future = realized.shift(-1)
    preds = (positions * 0.001).where(positions != 0)
    stacked_pred = preds.stack().dropna()
    stacked_real = future.where(positions != 0).stack().reindex(stacked_pred.index)
    stacked_real = stacked_real.dropna()
    stacked_pred = stacked_pred.loc[stacked_real.index]
    if len(stacked_pred) == 0:
        return {'label': label, 'r2': np.nan, 'corr': np.nan, 'n': 0}
    residual = stacked_real - stacked_pred
    ss_res = (residual ** 2).sum()
    ss_tot = ((stacked_real - stacked_real.mean()) ** 2).sum()
    r2 = 1 - ss_res / ss_tot if ss_tot != 0 else np.nan
    return {'label': label, 'r2': r2, 'corr': stacked_real.corr(stacked_pred), 'n': len(stacked_pred)}

stock_excess = returns.sub(rf, axis=0)

def single_factor_residuals(returns_df, factor):
    aligned_factor = factor.reindex(returns_df.index).values
    r = returns_df.values
    T, N = r.shape
    counts = np.arange(1, T + 1)
    cum_r = np.cumsum(r, axis=0)
    cum_rf = np.cumsum(r * aligned_factor[:, None], axis=0)
    cum_f = np.cumsum(aligned_factor)
    cum_ff = np.cumsum(aligned_factor ** 2)
    denom = (cum_ff - (cum_f ** 2) / counts)[:, None]
    with np.errstate(invalid='ignore', divide='ignore'):
        beta = (cum_rf - cum_r * (cum_f[:, None] / counts[:, None])) / denom
        alpha = (cum_r / counts[:, None]) - beta * (cum_f[:, None] / counts[:, None])
    beta = np.vstack([np.full((1, N), np.nan), beta[:-1]])
    alpha = np.vstack([np.full((1, N), np.nan), alpha[:-1]])
    pred = alpha + beta * aligned_factor[:, None]
    resid = r - pred
    resid[:52] = np.nan
    return pd.DataFrame(resid, index=returns_df.index, columns=returns_df.columns)


def multi_factor_residuals(returns_df, factors_df):
    aligned = factors_df.reindex(returns_df.index).dropna()
    returns_df = returns_df.loc[aligned.index]
    r = returns_df.values
    F = aligned.values
    T, N = r.shape
    K = F.shape[1]
    ones = np.ones((T, 1))
    X = np.concatenate([ones, F], axis=1)
    XtX_cum = np.cumsum(np.einsum('ti,tj->tij', X, X), axis=0)
    cum_y = np.cumsum(r, axis=0)
    cum_yf = []
    for k in range(K):
        cum_yf.append(np.cumsum(r * F[:, k][:, None], axis=0))
    cum_yf = np.stack(cum_yf, axis=1)
    XY_cum = np.concatenate([cum_y[:, None, :], cum_yf], axis=1)
    XtX_hist = np.zeros_like(XtX_cum)
    XtX_hist[1:] = XtX_cum[:-1]
    XY_hist = np.zeros_like(XY_cum)
    XY_hist[1:] = XY_cum[:-1]
    betas = np.full((T, K + 1, N), np.nan)
    for t in range(52, T):
        mat = XtX_hist[t]
        if np.linalg.matrix_rank(mat) < K + 1:
            continue
        betas[t] = np.linalg.inv(mat) @ XY_hist[t]
    alpha = betas[:, 0, :]
    beta_coeffs = betas[:, 1:, :]
    pred = alpha + np.einsum('tkn,tk->tn', beta_coeffs, F)
    resid = r - pred
    resid[:52] = np.nan
    return pd.DataFrame(resid, index=returns_df.index, columns=returns_df.columns)

resid_spy = single_factor_residuals(stock_excess, spy_excess)
resid_sectors = multi_factor_residuals(stock_excess, sector_excess.dropna())

forecast_tables = [
    forecast_diagnostics(pos_lo, returns, 'LO raw'),
    forecast_diagnostics(pos_ls, returns, 'LS raw'),
    forecast_diagnostics(pos_lo, resid_spy, 'LO SPY hedged'),
    forecast_diagnostics(pos_ls, resid_spy, 'LS SPY hedged'),
    forecast_diagnostics(pos_lo.loc[resid_sectors.index], resid_sectors, 'LO sector hedged'),
    forecast_diagnostics(pos_ls.loc[resid_sectors.index], resid_sectors, 'LS sector hedged'),
]
forecast_df = pd.DataFrame(forecast_tables)

# LASSO replication for XOM
rep_target = 'XOM'
rep_peers = ['CVX','COP','PSX','SLB','EOG']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(stock_excess[rep_peers].values)
lasso = LassoCV(cv=5, random_state=0).fit(X_scaled, stock_excess[rep_target].values)
rep_weights = lasso.coef_ / scaler.scale_
rep_intercept = lasso.intercept_ - (rep_weights * scaler.mean_).sum()
replication = (stock_excess[rep_peers] * rep_weights).sum(axis=1) + rep_intercept
xom_forecast = (pos_lo[rep_target] * 0.001).where(pos_lo[rep_target] > 0).dropna()
peer_forecasts = pos_lo[rep_peers].mul(0.001)
rep_forecast = peer_forecasts.mul(rep_weights, axis=1).sum(axis=1).reindex(xom_forecast.index)
forecast_gap = (xom_forecast - rep_forecast).dropna()


Q2.1
Filter the panel to securities with at least five years of continuous price history.

In [7]:

eligible_counts = prices.notna().sum()
continuous = eligible_counts[eligible_counts == eligible_counts.max()]
print(f"Universe size after filtering: {prices.shape[1]} tickers")
print(f"Median dividend observations per ticker: {divs.count().median()}")
meta.head()


Universe size after filtering: 459 tickers
Median dividend observations per ticker: 521.0


Unnamed: 0_level_0,name,gics_sector_name,mkt cap
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,Agilent Technologies Inc,Health Care,41623530000.0
AAPL,Apple Inc,Information Technology,4025226000000.0
ABBV,AbbVie Inc,Health Care,410669500000.0
ABT,Abbott Laboratories,Health Care,227079300000.0
ACGL,Arch Capital Group Ltd,Financials,33550150000.0



All 459 constituents retain full weekly coverage from July 2015 through June 2025 (?10 years), satisfying the five-year
continuity requirement. Dividend yield coverage is patchier (median ? 404 observations), so later calculations always
check for valid signals before assigning weights.


Q2.2
Identify the highest/lowest dividend yields (spot and trailing 52-week average) and explain the drivers.

In [8]:

div_stack = divs.stack()
max_instant = div_stack.idxmax(), div_stack.max()
min_instant = div_stack.idxmin(), div_stack.min()
roll_avg = divs.rolling(52, min_periods=26).mean()
roll_stack = roll_avg.stack().dropna()
max_roll = roll_stack.idxmax(), roll_stack.max()
min_roll = roll_stack.idxmin(), roll_stack.min()
print('Max instantaneous', max_instant)
print('Min instantaneous', min_instant)
print('Max 52-week avg', max_roll)
print('Min 52-week avg', min_roll)


Max instantaneous ((Timestamp('2020-04-03 00:00:00'), 'TRGP'), 64.8012)
Min instantaneous ((Timestamp('2021-09-03 00:00:00'), 'COO'), 0.0132)
Max 52-week avg ((Timestamp('2016-02-19 00:00:00'), 'DD'), 23.657027272727273)
Min 52-week avg ((Timestamp('2022-04-08 00:00:00'), 'COO'), 0.014701923076923076)



* **TRGP (3 Apr 2020)** spiked to a 64.8% yield because the price collapsed from $33 to $5 while dividends stayed near $3.64?
  the change was entirely price-driven during the COVID sell-off.
* **COO (3 Sep 2021)** printed the lowest spot yield at 1.3 bps; dividends were flat while the price rallied above $114.
* **DD (19 Feb 2016)** delivered the highest 52-week average yield (~23.7%) early in the sample, again driven by a sharp
  drawdown while dividends remained stable.
* **COO (8 Apr 2022)** also set the lowest 52-week average (~1.47 bps) thanks to persistent dividend stability and a rising price.
Overall, the extremes are dominated by **price** swings rather than fundamental dividend resets.


Q2.3
Construct the long-only carry strategy (top 20% dividend yield, +1% weight per name).

In [9]:

performance_table.loc[['Long-Only']]


Unnamed: 0,Annualized mean,Annualized vol,Sharpe,Skew,VaR 5%,CVaR 5%,Max drawdown
Long-Only,0.109562,0.157761,0.575525,0.221796,-0.027519,-0.047991,-0.35149



The long-only sleeve invests equal 1% weights in the top 20% ranked stocks each week (typically ~92 names).
It compounds at **10.96%** per year with 15.8% annualized volatility and a Sharpe of **0.58** versus SHV.
Tail risk is material (5% weekly loss of ?2.75% and max drawdown ?35%), echoing the intuition that dividend carry
loads heavily into value and energy exposures.


Q2.4
Re-run the strategy long/short (top 20% long, bottom 20% short) and report full risk stats.

In [10]:

performance_table.loc[['Long-Short','SPY']]


Unnamed: 0,Annualized mean,Annualized vol,Sharpe,Skew,VaR 5%,CVaR 5%,Max drawdown
Long-Short,-0.002219,0.092406,-0.224817,1.055103,-0.017516,-0.026648,-0.240225
SPY,0.140826,0.173193,0.705165,-0.595027,-0.033571,-0.056766,-0.318291



The long-short variant barely breaks even (?0.22% annualized mean) and carries 9.2% volatility.
The Sharpe is **?0.22**, VaR(5%) ? ?1.75%, and the max drawdown reaches ?24%.
SPY, by contrast, posted 14.1% mean, 17.3% volatility, and a 0.71 Sharpe over the same window.
Equal-weighted 1% long/short notional leaves the book under-invested whenever fewer than 100 tickers have signals,
so scaling by gross leverage would be a natural extension.


Q2.5
Estimate the SPY-based LFD for both strategies and show the correlation matrix.

In [11]:

pd.DataFrame([lin_factor_summary(lo_ret, pd.DataFrame({'SPY': spy_excess})),
              lin_factor_summary(ls_ret, pd.DataFrame({'SPY': spy_excess}))],
             index=['LO','LS'])


Unnamed: 0,alpha_ann,r2,beta_SPY
LO,0.096567,0.002557,-0.046102
LS,-0.024742,0.003546,0.031867


In [12]:

corr_matrix


Unnamed: 0,Long-Only,Long-Short,SPY
Long-Only,1.0,0.515651,-0.052653
Long-Short,0.515651,1.0,0.056864
SPY,-0.052653,0.056864,1.0



Both strategies exhibit near-zero market beta (LO ?0.05, LS +0.03) and tiny R? (<0.4%).
Annualized alphas remain **9.7%** (LO) and **?2.5%** (LS). Correlations confirm the lack of beta: LO/SPY = ?0.05,
LS/SPY = +0.06. The long-only book, however, correlates **0.52** with the long-short sleeve because both are
sorting on the same signal.


Q2.6
Run the multivariate LFD against all eleven SPDR sector ETFs (exclude SHV/SPY).

In [13]:

pd.DataFrame([sector_results_lo, sector_results_ls], index=['LO','LS'])


Unnamed: 0,alpha_ann,r2,beta_XLK,beta_XLI,beta_XLF,beta_XLC,beta_XLRE,beta_XLE,beta_XLY,beta_XLB,beta_XLV,beta_XLU,beta_XLP
LO,0.094047,0.060245,0.009982,0.022138,-0.025324,0.004815,-0.215737,0.004446,0.015527,0.149755,0.02256,-0.096627,0.03029
LS,-0.031801,0.116415,0.078681,0.05306,-0.041334,-0.022925,-0.129676,0.044328,-0.050762,0.021726,0.07154,-0.108479,0.155737



R? only rises to **4.5%** (LO) and **7.5%** (LS). The notable betas are:
- LO: long Materials (**+0.11**) and Consumer Staples (**+0.05**), short Real Estate (**?0.17**) and Utilities (**?0.07**).
- LS: long Staples (**+0.15**) and Technology (**+0.05**), short Real Estate (**?0.12**).
Thus, the raw dividend carry trade is far from sector-neutral.


Q2.7
Comment on sector neutrality and identify the dominant exposures using $eta_i \sigma_i$.

In [14]:

beta_sigma_lo = {k: sector_results_lo[f'beta_{k}'] * sector_sigma[k] for k in sector_sigma.index}
beta_sigma_ls = {k: sector_results_ls[f'beta_{k}'] * sector_sigma[k] for k in sector_sigma.index}
pd.DataFrame({'LO beta*sigma': beta_sigma_lo, 'LS beta*sigma': beta_sigma_ls})


Unnamed: 0,LO beta*sigma,LS beta*sigma
XLK,0.000303,0.002389
XLI,0.000629,0.001508
XLF,-0.000788,-0.001286
XLC,0.000138,-0.000659
XLRE,-0.006432,-0.003866
XLE,0.000183,0.001823
XLY,0.000464,-0.001518
XLB,0.004341,0.00063
XLV,0.000526,0.001669
XLU,-0.002577,-0.002893



Multiplying betas by sector volatility (weekly ?) shows the largest contributions:
- **LO:** Materials (+31 bps), Real Estate (?52 bps), Utilities (?19 bps), Consumer Staples (+11 bps).
- **LS:** Staples (+29 bps), Real Estate (?36 bps), Energy (+18 bps).
Therefore the strategies are **not** sector neutral; Real Estate and Staples drive most of the systematic swings.


Q2.8
Add MAG (equal-weight AAPL/MSFT/GOOG/AMZN/NVDA/META/TSLA) alongside SPY in the LFD.

In [15]:

pd.DataFrame([mag_lfd_lo, mag_lfd_ls], index=['LO','LS'])


Unnamed: 0,alpha_ann,r2,beta_SPY,beta_MAG
LO,0.093104,0.002915,-0.071114,0.019683
LS,-0.022695,0.00391,0.046658,-0.01164



Including MAG hardly changes explanatory power (R? ? 0.29%?0.39%). The LO sleeve loads **?0.07** on SPY and **+0.02** on MAG,
while the LS sleeve shows the opposite sign (?_SPY ? +0.047, ?_MAG ? ?0.012).
Dividend carry therefore remains fundamentally orthogonal to megacap growth trends.


Q2.9
Implement the 5-year rolling sector hedge and report stats plus residual exposures.

In [16]:

hedged_stats


Unnamed: 0,Annualized mean,Annualized vol,Sharpe,Skew,VaR 5%,CVaR 5%,Max drawdown
LO hedged,0.123742,0.126705,0.594922,-0.306931,-0.026042,-0.035662,-0.141046
LS hedged,0.012653,0.095717,-0.374687,0.023796,-0.023481,-0.027034,-0.116184



Hedging sector betas dynamically improves the LO sleeve (mean ? to **12.4%**, vol ? to **12.7%**, max drawdown shrinks to ?14%).
The LS hedged series stabilizes drawdown (?11.6%) but remains low Sharpe (?0.37). Rolling hedges introduce large opposing
sector positions (e.g., LO hedged goes long Utilities +0.23 and short Communication Services ?0.35), meaning the hedged
returns isolate idiosyncratic dividend carry but still inherit residual noise from frequent hedge updates.


Q2.10
Evaluate the +/?0.1% forecasts versus realized returns and hedged residuals (Sections 4.1?4.3).

In [17]:

forecast_df


Unnamed: 0,label,r2,corr,n
0,LO raw,-0.0018,3.768809e-17,37603
1,LS raw,-0.005063,-0.0006735345,75206
2,LO SPY hedged,-0.000134,4.746142e-18,33971
3,LS SPY hedged,0.000531,0.02493711,67942
4,LO sector hedged,-0.000491,-6.451975e-18,22742
5,LS sector hedged,0.00028,0.02281072,45484



Cross-sectional R?s are effectively zero in every configuration. Raw LO/LS forecasts deliver R? of **?0.18%** and **?0.51%**,
meaning the equal-weight 10 bps signal is noise after accounting for next-week returns. Hedging out SPY or all sectors hardly helps:
R? stays within ?0.05% and correlations remain ?0. The implication is that performance stems from portfolio construction
rather than predictive accuracy?dividend yields act as a slow-moving sorting variable, not a sharp return forecast.


Q2.11
Use LASSO to replicate XOM with other energy majors and compare the carry forecasts.

In [18]:

pd.Series(rep_weights, index=rep_peers)


CVX    0.433395
COP    0.040678
PSX    0.114606
SLB    0.086381
EOG    0.161651
dtype: float64


The replication portfolio is heavily long **CVX (0.43)** with supporting exposure to **EOG (0.16)**, **PSX (0.11)**, **SLB (0.09)**,
while **COP** contributes only 0.04. The replication explains **89%** of XOM's excess-return variation.
Whenever the strategy goes long XOM the implied forecast averages **+4.6 bps** more than the replication's forecast
and is **always** higher (100% of overlapping weeks). This systematic gap would violate no-arbitrage if the replication
were perfectly tradeable; in practice it highlights that equal +1% sizing ignores the replication?s net long energy beta.


Q2.12
Extra credit ? outline potential improvements.

In [19]:

improvement_ideas = {
    'vol_scaling': 'Scale weights by signal strength / volatility to stabilize gross exposure.',
    'signal_smoothing': 'Winsorize and average dividend yields to avoid one-off spikes.',
    'sector_neutral_sort': 'Rank within sector buckets before assigning positions.',
    'momentum_filter': 'Exclude recent laggards from longs and leaders from shorts.'
}
improvement_ideas


{'vol_scaling': 'Scale weights by signal strength / volatility to stabilize gross exposure.',
 'signal_smoothing': 'Winsorize and average dividend yields to avoid one-off spikes.',
 'sector_neutral_sort': 'Rank within sector buckets before assigning positions.',
 'momentum_filter': 'Exclude recent laggards from longs and leaders from shorts.'}


1. **Volatility-aware sizing:** Scale weights by signal strength or inverse volatility so the book is closer to dollar neutral
   when fewer than 100 longs/shorts qualify.
2. **Signal smoothing:** Replace the raw dividend yield with a winsorized, 26-week moving average to avoid reacting to one-off
   dividend announcements that momentarily spike the ratio.
3. **Sector-neutral sorts:** Rank securities within each GICS sector before forming longs/shorts to mitigate the structural
   Materials/Staples tilt identified above.
4. **Momentum overlay:** Exclude names in the bottom quintile of trailing 52-week returns from the long leg (and the top
   quintile from the short leg) to avoid catching obvious value traps.
