In [None]:
import sf_quant.data as sfd
import sf_quant.optimizer as sfo
import sf_quant.backtester as sfb
import sf_quant.performance as sfp
import polars as pl
import datetime as dt
import tqdm

  from .autonotebook import tqdm as notebook_tqdm
2025-09-27 10:53:51,704	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [3]:
df = pl.read_parquet('../russell_3000_daily.parquet')

In [5]:
IC = 0.05

df = (
    df.lazy()
    .sort(["barrid", "date"])
    .with_columns([ # Convert nasty percents to nice fractions
        pl.col('specific_risk').truediv(100),
        pl.col('return').truediv(100),
        pl.col('specific_return').truediv(100)
    ])
    .with_columns(
        pl.col('return').log1p().alias('log_return')
    )
    .with_columns(
        pl.col("log_return")
            .rolling_sum(230)
            .over("barrid")
            .alias("momentum_temp")
    )
    .with_columns(
        pl.col("momentum_temp").shift(22).over("barrid").alias("momentum")
    )
    .with_columns(
        pl.col("log_return")
            .rolling_sum(22)
            .over("barrid")
            .alias("meanrev_temp")
    )
    .with_columns(
        (-pl.col("meanrev_temp").shift(1).over("barrid")).alias("meanrev")
    )
    .with_columns(
        (-pl.col("predicted_beta")).alias("bab")
    )
    .with_columns([ # Add signal z-scores
        ((pl.col("momentum") - pl.col("momentum").mean().over("date")) 
     / pl.col("momentum").std().over("date")).alias("momentum_z"),
        ((pl.col("meanrev") - pl.col("meanrev").mean().over("date")) 
     / pl.col("meanrev").std().over("date")).alias("meanrev_z"),
        ((pl.col("bab") - pl.col("bab").mean().over("date")) 
     / pl.col("bab").std().over("date")).alias("bab_z")
    ])
    .with_columns([ # Add signal alphas, using alpha = IC * specific_risk * z-score
        (IC * pl.col("specific_risk") * pl.col("momentum_z")).alias("momentum_alpha"),
        (IC * pl.col("specific_risk") * pl.col("meanrev_z")).alias("meanrev_alpha"),
        (IC * pl.col("specific_risk") * pl.col("bab_z")).alias("bab_alpha")
    ])
    .drop(["momentum_temp", "meanrev_temp"])
    .collect()
)

In [6]:
# Need to add filter here to drop nulls, low prices, etc.
print(df.shape)

df = (df.sort('barrid', 'date')
        .with_columns(
            pl.col('price').shift(1).over('barrid').alias('price_lag')
        )
        .filter(
            pl.col('price').gt(5),
            pl.col('momentum').is_not_null(),
            pl.col('meanrev').is_not_null(),
            pl.col('bab').is_not_null()
        )
        .sort('barrid', 'date')
        .drop('price_lag')
    )

print(df.shape)


(22363472, 22)
(18411839, 22)


In [7]:
df.write_parquet('signal_data.parquet')

In [None]:
constraints = [
    sfo.FullInvestment(),
    sfo.LongOnly(),
    sfo.NoBuyingOnMargin(),
    sfo.UnitBeta()
]

momentum_weights = sfb.backtest_parallel(df.rename({'momentum_alpha': 'alpha'}), constraints, 2)

2025-09-27 10:56:02,358	INFO worker.py:1951 -- Started a local Ray instance.
