We can either get a q matrix by:
- a ML approach to predict expected returns for our alphas, or
- w_s * alpha_s

In [1]:
import sf_quant.data as sfd
import polars as pl
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm
2025-10-01 13:26:09,094	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
signal_weights = pl.read_parquet("../../signal_weights.parquet")

signal_weights

date,barrid,bab_weight,meanrev_weight,momentum_weight
date,str,f64,f64,f64
1995-06-30,"""USAA191""",-1.0844e-20,,
1995-06-30,"""USAA1W1""",-9.1201e-22,,
1995-06-30,"""USAA1Y1""",-4.8273e-21,,
1995-06-30,"""USAA2L1""",-4.2673e-21,,
1995-06-30,"""USAA311""",-9.2198e-21,,
…,…,…,…,…
2025-09-15,"""USBQZR1""",-6.9890e-22,-1.3553e-20,
2025-09-15,"""USBRGT1""",-3.7165e-21,-3.4045e-19,
2025-09-15,"""USBRH21""",-2.6956e-21,-9.9263e-20,
2025-09-15,"""USBRKA1""",-5.5406e-21,-4.3920e-19,


In [3]:
bab_alpha = pl.read_parquet("/home/jaic007/silverFund/combineSignal/signalCalc/signalAlphas/bab_alpha.parquet")

meanrev_alpha = pl.read_parquet("/home/jaic007/silverFund/combineSignal/signalCalc/signalAlphas/meanrev_alpha.parquet")

mom_alpha = pl.read_parquet("/home/jaic007/silverFund/combineSignal/signalCalc/signalAlphas/mom_alpha.parquet")

bab_alpha_renamed = bab_alpha.select(['date', 'barrid', 'alpha']).rename({'alpha': 'bab_alpha'})
mom_alpha_renamed = mom_alpha.select(['date', 'barrid', 'alpha']).rename({'alpha': 'momentum_alpha'})
meanrev_alpha_renamed = meanrev_alpha.select(['date', 'barrid', 'alpha']).rename({'alpha': 'meanrev_alpha'})

# Perform a full outer join with coalesce to handle the duplicate key columns
combined_alpha = (
    bab_alpha_renamed
    .join(mom_alpha_renamed, on=['date', 'barrid'], how='full', coalesce=True)
    .join(meanrev_alpha_renamed, on=['date', 'barrid'], how='full', coalesce=True)
)

# Sort by date and barrid for cleaner output
combined_alpha = combined_alpha.sort(['date', 'barrid'])

In [4]:
combined_alpha

date,barrid,bab_alpha,momentum_alpha,meanrev_alpha
date,str,f64,f64,f64
1995-07-03,"""USAA191""",-0.003302,,
1995-07-03,"""USAA1W1""",0.032054,,
1995-07-03,"""USAA1Y1""",-0.014456,,
1995-07-03,"""USAA2L1""",0.017932,,
1995-07-03,"""USAA311""",0.003537,,
…,…,…,…,…
2024-12-31,"""USBQFF1""",0.021174,,-0.019814
2024-12-31,"""USBQGD1""",0.019763,,0.001263
2024-12-31,"""USBQLB1""",0.022202,,-0.00764
2024-12-31,"""USBQLU1""",0.00477,,-0.004014


In [5]:
# Join the combined alphas with signal weights
qMatrix = combined_alpha.join(
    signal_weights,
    on=['date', 'barrid'],
    how='left'  # or 'inner' if you only want matching rows
)

# Multiply each alpha by its respective weight to get signal expected returns
qMatrix = qMatrix.with_columns([
    (pl.col('bab_alpha') * pl.col('bab_weight')).alias('bab_er'),
    (pl.col('momentum_alpha') * pl.col('momentum_weight')).alias('momentum_er'),
    (pl.col('meanrev_alpha') * pl.col('meanrev_weight')).alias('meanrev_er')
])

# If you want to keep only the essential columns:
qMatrix = qMatrix.select([
    'date',
    'barrid',
    'bab_er',
    'momentum_er',
    'meanrev_er'
])

In [7]:
qMatrix

date,barrid,bab_er,momentum_er,meanrev_er
date,str,f64,f64,f64
1995-07-03,"""USAA191""",2.6152e-23,,
1995-07-03,"""USAA1W1""",-1.8986e-23,,
1995-07-03,"""USAA1Y1""",7.7330e-23,,
1995-07-03,"""USAA2L1""",-5.9099e-23,,
1995-07-03,"""USAA311""",-2.4240e-23,,
…,…,…,…,…
2024-12-31,"""USBQFF1""",0.000035,,1.4599e-9
2024-12-31,"""USBQGD1""",-1.3423e-22,,-5.0275e-11
2024-12-31,"""USBQLB1""",-1.2665e-22,,3.8366e-10
2024-12-31,"""USBQLU1""",-3.9287e-23,,4.5593e-10


In [9]:
signal_er = (
    qMatrix.group_by("date")
     .agg([
         pl.col("bab_er").sum().alias("bab_er"),
         pl.col("momentum_er").sum().alias("momentum_er"),
         pl.col("meanrev_er").sum().alias("meanrev_er")
     ])
     .sort("date")
)

signal_er

date,bab_er,momentum_er,meanrev_er
date,f64,f64,f64
1995-07-03,-0.002372,0.0,0.0
1995-07-05,-0.002456,0.0,0.0
1995-07-06,-0.002549,0.0,0.0
1995-07-07,-0.002443,0.0,0.0
1995-07-10,-0.002684,0.0,0.0
…,…,…,…
2024-12-24,0.003805,0.030739,0.045066
2024-12-26,0.003904,0.03071,0.042249
2024-12-27,0.003647,0.02877,0.043792
2024-12-30,0.003365,0.028493,0.041962


In [14]:
signal_bab = pl.read_parquet("/home/jaic007/silverFund/combineSignal/signalCalc/signalReturns/signal_bab.parquet")
signal_meanrev = pl.read_parquet("/home/jaic007/silverFund/combineSignal/signalCalc/signalReturns/signal_meanrev.parquet")
signal_momentum = pl.read_parquet("/home/jaic007/silverFund/combineSignal/signalCalc/signalReturns/signal_momentum.parquet")

signals_daily = (
    signal_bab
    .join(signal_meanrev, on="date", how="full", suffix="_meanrev")
    .join(signal_momentum, on="date", how="full", suffix="_momentum")
    .sort("date")
    .select(['date', 'bab_return', 'meanrev_return', 'momentum_return'])
)


signals_daily

date,bab_return,meanrev_return,momentum_return
date,f64,f64,f64
1995-06-30,0.003391,0.0,0.0
1995-07-03,0.006842,0.0,0.0
1995-07-05,0.015751,0.0,0.0
1995-07-06,0.011595,0.0,0.0
1995-07-07,0.007155,0.0,0.0
…,…,…,…
2025-09-08,0.008037,0.003606,0.018773
2025-09-09,-0.001619,0.000151,-0.013817
2025-09-10,0.014106,0.036719,0.021146
2025-09-11,-0.002669,-0.004925,0.012381


In [15]:
comparison = (
    signal_er.join(signals_daily, on="date", how="inner")
            .sort("date")
)

comparison

date,bab_er,momentum_er,meanrev_er,bab_return,meanrev_return,momentum_return
date,f64,f64,f64,f64,f64,f64
1995-07-03,-0.002372,0.0,0.0,0.006842,0.0,0.0
1995-07-05,-0.002456,0.0,0.0,0.015751,0.0,0.0
1995-07-06,-0.002549,0.0,0.0,0.011595,0.0,0.0
1995-07-07,-0.002443,0.0,0.0,0.007155,0.0,0.0
1995-07-10,-0.002684,0.0,0.0,-0.001244,0.0,0.0
…,…,…,…,…,…,…
2024-12-24,0.003805,0.030739,0.045066,0.001975,0.01208,0.012256
2024-12-26,0.003904,0.03071,0.042249,-0.009431,-0.015744,-0.024086
2024-12-27,0.003647,0.02877,0.043792,-0.008503,-0.021729,-0.022794
2024-12-30,0.003365,0.028493,0.041962,-0.001555,0.007854,-0.008189


In [17]:
corrs = comparison.select([
    pl.corr("bab_er", "bab_return").alias("bab_corr"),
    pl.corr("momentum_er", "momentum_return").alias("momentum_corr"),
    pl.corr("meanrev_er", "meanrev_return").alias("meanrev_corr")
])

diffs = comparison.select([
    (pl.col("bab_er") - pl.col("bab_return")).mean().alias("bab_diff"),
    (pl.col("momentum_er") - pl.col("momentum_return")).mean().alias("momentum_diff"),
    (pl.col("meanrev_er") - pl.col("meanrev_return")).mean().alias("meanrev_diff")
])

diffs

bab_diff,momentum_diff,meanrev_diff
f64,f64,f64
0.002913,0.033824,0.057651
