In [1]:
import sf_quant.data as sfd
import sf_quant.optimizer as sfo
import sf_quant.backtester as sfb
import sf_quant.performance as sfp
import polars as pl
import datetime as dt
import tqdm

  from .autonotebook import tqdm as notebook_tqdm
2025-10-08 16:35:49,477	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [4]:
bl = pl.read_parquet('bl_weights/bl_weights_pivot.parquet')

In [5]:
bl

date,barrid,weight
date,str,f64
1996-07-16,"""USAA191""",-2.3810e-7
1996-07-16,"""USAA1Y1""",0.000382
1996-07-16,"""USAA251""",-7.3626e-8
1996-07-16,"""USAA311""",0.000317
1996-07-16,"""USAA3I1""",-1.4242e-7
…,…,…
2024-12-31,"""USBQNC1""",8.8906e-8
2024-12-31,"""USBQND1""",1.4242e-8
2024-12-31,"""USBQO11""",0.000009
2024-12-31,"""USBQOE1""",0.000049


In [3]:
df = pl.read_parquet('../russell_3000_daily.parquet')

In [None]:
IC = 0.05

df = (
    df.lazy()
    .sort(["barrid", "date"])
    .with_columns([ # Convert nasty percents to nice fractions
        pl.col('specific_risk').truediv(100),
        pl.col('return').truediv(100),
        pl.col('specific_return').truediv(100)
    ])
    .with_columns(
        pl.col('return').log1p().alias('log_return')
    )
    .with_columns(
        pl.col("log_return")
            .rolling_sum(230)
            .over("barrid")
            .alias("momentum_temp")
    )
    .with_columns(
        pl.col("momentum_temp").shift(22).over("barrid").alias("momentum")
    )
    .with_columns(
        pl.col("log_return")
            .rolling_sum(22)
            .over("barrid")
            .alias("meanrev_temp")
    )
    .with_columns(
        (-pl.col("meanrev_temp")).alias("meanrev")
    )
    .with_columns(
        (-pl.col("predicted_beta")).alias("bab")
    )
    .with_columns([ # Add signal z-scores
        ((pl.col("momentum") - pl.col("momentum").mean().over("date")) 
     / pl.col("momentum").std().over("date")).alias("momentum_z"),
        ((pl.col("meanrev") - pl.col("meanrev").mean().over("date")) 
     / pl.col("meanrev").std().over("date")).alias("meanrev_z"),
        ((pl.col("bab") - pl.col("bab").mean().over("date")) 
     / pl.col("bab").std().over("date")).alias("bab_z")
    ])
    .with_columns([ # Add signal alphas, using alpha = IC * specific_risk * z-score
        (IC * pl.col("specific_risk") * pl.col("momentum_z")).alias("momentum_alpha"),
        (IC * pl.col("specific_risk") * pl.col("meanrev_z")).alias("meanrev_alpha"),
        (IC * pl.col("specific_risk") * pl.col("bab_z")).alias("bab_alpha")
    ])
    .drop(["momentum_temp", "meanrev_temp"])
    .collect()
)

In [None]:
print(df.shape)
df = (
    df.sort('barrid', 'date')
      .filter(
          pl.col('price').gt(5),
          (
              pl.col('momentum').is_not_null()
              | pl.col('meanrev').is_not_null()
              | pl.col('bab').is_not_null()
          )
      )
      .sort('barrid', 'date')
)

print(df.shape)


(22363472, 22)
(20764025, 22)


In [7]:
print(df.sort('date').select('date').unique().to_numpy()[0], df.sort('date').select('date').unique().to_numpy()[-1])

['1995-06-30'] ['2025-09-15']


In [8]:
df.write_parquet('signal_data.parquet')

In [9]:
df

date,barrid,ticker,price,return,specific_return,specific_risk,historical_beta,predicted_beta,market_cap,daily_volume,bid_ask_spread,log_return,momentum,meanrev,bab,momentum_z,meanrev_z,bab_z,momentum_alpha,meanrev_alpha,bab_alpha
date,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2013-07-31,"""USA06Z1""","""MDXG""",6.26,-0.001595,-0.00788,0.550569,0.328385,0.34349,6.006157e8,121693.0,0.01,-0.001596,,,-0.34349,,,2.149267,,,0.059166
2013-08-01,"""USA06Z1""","""MDXG""",6.32,0.009585,0.00365,0.55028,0.334989,0.353329,6.0865392e8,131728.0,0.01,0.009539,,,-0.353329,,,2.111728,,,0.058102
2013-08-02,"""USA06Z1""","""MDXG""",6.31,-0.001582,-0.00731,0.548074,0.330713,0.363624,6.0769086e8,43252.0,0.01,-0.001583,,,-0.363624,,,2.086144,,,0.057168
2013-08-05,"""USA06Z1""","""MDXG""",6.45,0.022187,0.01936,0.547667,0.324494,0.356596,6.211737e8,70944.0,0.02,0.021944,,,-0.356596,,,2.098288,,,0.057458
2013-08-06,"""USA06Z1""","""MDXG""",6.29,-0.024806,-0.00528,0.546922,0.366323,0.399196,6.0576474e8,77085.0,0.01,-0.025119,,,-0.399196,,,2.001238,,,0.054726
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2025-09-09,"""USBRKA2""","""GLIBA""",36.225,-0.004261,0.01121,0.208427,0.236481,0.302514,1.3226e8,31025.0,0.11,-0.00427,,-0.007341,-0.302514,,0.37369,2.013268,,0.003894,0.020981
2025-09-10,"""USBRKA2""","""GLIBA""",36.24,0.000414,0.02305,0.209547,0.252015,0.29845,1.3231e8,6298.0,0.21,0.000414,,-0.020066,-0.29845,,0.247624,2.007619,,0.002594,0.021035
2025-09-11,"""USBRKA2""","""GLIBA""",37.05,0.022351,0.00213,0.214807,0.256752,0.31384,1.3527e8,26857.0,0.25,0.022105,,-0.065239,-0.31384,,0.07841,1.987568,,0.000842,0.021347
2025-09-12,"""USBRKA2""","""GLIBA""",36.17,-0.023752,-0.00733,0.219429,0.257401,0.328961,1.3205e8,8983.0,0.16,-0.024039,,-0.043797,-0.328961,,-0.046662,1.958489,,-0.000512,0.021488


In [8]:
one_year_weights = pl.read_parquet('weights/momentum_weights_2023-06-27_2024-06-26.parquet')

In [9]:
print(one_year_weights['weight'].sum())

251.99999998255518


In [3]:
df = pl.read_parquet("signal_data.parquet").filter(pl.col('barrid').eq("USA06Z1")).select(['date', 'barrid', 'price'])

df

date,barrid,price
date,str,f64
2013-07-31,"""USA06Z1""",6.26
2013-08-01,"""USA06Z1""",6.32
2013-08-02,"""USA06Z1""",6.31
2013-08-05,"""USA06Z1""",6.45
2013-08-06,"""USA06Z1""",6.29
…,…,…
2025-09-09,"""USA06Z1""",7.12
2025-09-10,"""USA06Z1""",6.89
2025-09-11,"""USA06Z1""",7.14
2025-09-12,"""USA06Z1""",6.94


In [5]:
df = pl.scan_parquet("signal_data.parquet").filter(pl.col('barrid').eq("USA06Z1")).select(['date', 'barrid', 'price']).collect()

df

date,barrid,price
date,str,f64
2013-07-31,"""USA06Z1""",6.26
2013-08-01,"""USA06Z1""",6.32
2013-08-02,"""USA06Z1""",6.31
2013-08-05,"""USA06Z1""",6.45
2013-08-06,"""USA06Z1""",6.29
…,…,…
2025-09-09,"""USA06Z1""",7.12
2025-09-10,"""USA06Z1""",6.89
2025-09-11,"""USA06Z1""",7.14
2025-09-12,"""USA06Z1""",6.94
