In [1]:
# Import necessary packages

import polars as pl

In [2]:
''' 
Constraints we have:
- Full investment
- Unit beta
- No buying on margin
- Long only
'''

' \nConstraints we have:\n- Full investment\n- Unit beta\n- No buying on margin\n- Long only\n'

In [3]:
# Read weights

weights = pl.read_parquet('signal_weights/weights/all_weights_pivot.parquet')
r3000_daily = pl.read_parquet('russell_3000_daily.parquet')

In [4]:
r3000_daily

date,barrid,ticker,price,return,specific_return,specific_risk,historical_beta,predicted_beta,market_cap,daily_volume,bid_ask_spread
date,str,str,f64,f64,f64,f64,f64,f64,f64,f64,f64
2013-07-31,"""USA06Z1""","""MDXG""",6.26,-0.1595,-0.788,55.056916,0.328385,0.34349,6.006157e8,121693.0,0.01
2013-08-01,"""USA06Z1""","""MDXG""",6.32,0.9585,0.365,55.028021,0.334989,0.353329,6.0865392e8,131728.0,0.01
2013-08-02,"""USA06Z1""","""MDXG""",6.31,-0.1582,-0.731,54.807402,0.330713,0.363624,6.0769086e8,43252.0,0.01
2013-08-05,"""USA06Z1""","""MDXG""",6.45,2.2187,1.936,54.76671,0.324494,0.356596,6.211737e8,70944.0,0.02
2013-08-06,"""USA06Z1""","""MDXG""",6.29,-2.4806,-0.528,54.692162,0.366323,0.399196,6.0576474e8,77085.0,0.01
…,…,…,…,…,…,…,…,…,…,…,…
2025-09-09,"""USBRKA2""","""GLIBA""",36.225,-0.4261,1.121,20.842679,0.236481,0.302514,1.3226e8,31025.0,0.11
2025-09-10,"""USBRKA2""","""GLIBA""",36.24,0.0414,2.305,20.954701,0.252015,0.29845,1.3231e8,6298.0,0.21
2025-09-11,"""USBRKA2""","""GLIBA""",37.05,2.2351,0.213,21.480703,0.256752,0.31384,1.3527e8,26857.0,0.25
2025-09-12,"""USBRKA2""","""GLIBA""",36.17,-2.3752,-0.733,21.942939,0.257401,0.328961,1.3205e8,8983.0,0.16


In [5]:
weights

date,barrid,bab_weight,meanrev_weight,momentum_weight
date,str,f64,f64,f64
1995-06-30,"""USAA191""",-1.0844e-20,,
1995-06-30,"""USAA1W1""",-9.1201e-22,,
1995-06-30,"""USAA1Y1""",-4.8273e-21,,
1995-06-30,"""USAA2L1""",-4.2673e-21,,
1995-06-30,"""USAA311""",-9.2198e-21,,
…,…,…,…,…
2025-09-15,"""USBQZR1""",-6.9890e-22,-6.1406e-20,
2025-09-15,"""USBRGT1""",-3.7165e-21,-6.0722e-19,
2025-09-15,"""USBRH21""",-2.6956e-21,-2.1598e-19,
2025-09-15,"""USBRKA1""",-5.5406e-21,-8.6297e-19,


In [19]:
mergedWt = r3000_daily.join(
    weights,
    on=["date", "barrid"],
    how="inner"
)

df = mergedWt

tol = 1e-6

In [20]:
# --- 1. Full investment ---
# For each signal, the daily weights across all securities should sum to 1
full_investment = (
    df.group_by("date")
      .agg([
          # Daily sum of weights (ignores nulls)
          pl.sum("bab_weight").alias("bab_sum"),
          pl.sum("meanrev_weight").alias("meanrev_sum"),
          pl.sum("momentum_weight").alias("momentum_sum"),

          # Count how many non-null weights exist for that signal/day
          # (used to detect inactive signals where all weights are null)
          pl.count("bab_weight").alias("bab_count"),
          pl.count("meanrev_weight").alias("meanrev_count"),
          pl.count("momentum_weight").alias("momentum_count"),
      ])
      .with_columns([
          # Check constraint: sum ≈ 1 (within tolerance)
          # If no weights (count = 0), return None (skip that day)
          pl.when(pl.col("bab_count") > 0)
            .then(((pl.col("bab_sum") - 1).abs() < tol))
            .otherwise(None).alias("bab_full_investment_ok"),

          pl.when(pl.col("meanrev_count") > 0)
            .then(((pl.col("meanrev_sum") - 1).abs() < tol))
            .otherwise(None).alias("meanrev_full_investment_ok"),

          pl.when(pl.col("momentum_count") > 0)
            .then(((pl.col("momentum_sum") - 1).abs() < tol))
            .otherwise(None).alias("momentum_full_investment_ok"),
      ])
      # Drop intermediate counts; only keep sums + booleans
      .drop(["bab_count", "meanrev_count", "momentum_count"]) 
)

# Collapse across all dates:
# .all() checks if every row is True (ignores nulls unless all null)
# .row(0) extracts as a tuple (bab_ok, meanrev_ok, momentum_ok)
full_investment_ok = (
    full_investment
    .select([
        pl.col("bab_full_investment_ok").all(),
        pl.col("meanrev_full_investment_ok").all(),
        pl.col("momentum_full_investment_ok").all()
    ])
    .row(0)
)

full_investment_ok

(True, True, True)

In [21]:
# --- 2. Unit beta ---
# For each signal, the sum of (weight × predicted_beta) per day should equal 1
unit_beta = (
    df.with_columns([
        # Calculate daily weighted betas per signal
        (df["bab_weight"] * df["predicted_beta"]).alias("bab_beta"),
        (df["meanrev_weight"] * df["predicted_beta"]).alias("meanrev_beta"),
        (df["momentum_weight"] * df["predicted_beta"]).alias("momentum_beta"),
    ])
    .group_by("date")
    .agg([
        # Aggregate by day: sum of weighted betas
        pl.sum("bab_beta").alias("bab_beta_sum"),
        pl.sum("meanrev_beta").alias("meanrev_beta_sum"),
        pl.sum("momentum_beta").alias("momentum_beta_sum"),

        # Also keep weight sums for the OR condition
        pl.sum("bab_weight").alias("bab_weight_sum"),
        pl.sum("meanrev_weight").alias("meanrev_weight_sum"),
        pl.sum("momentum_weight").alias("momentum_weight_sum"),
    ])
    .with_columns([
        pl.when(pl.col("bab_beta_sum").is_not_null())
          .then(
              ((pl.col("bab_beta_sum") - 1).abs() < tol) |
              (pl.col("bab_weight_sum") == 0)
          )
          .otherwise(None).alias("bab_unit_beta_ok"),

        pl.when(pl.col("meanrev_beta_sum").is_not_null())
          .then(
              ((pl.col("meanrev_beta_sum") - 1).abs() < tol) |
              (pl.col("meanrev_weight_sum") == 0)
          )
          .otherwise(None).alias("meanrev_unit_beta_ok"),

        pl.when(pl.col("momentum_beta_sum").is_not_null())
          .then(
              ((pl.col("momentum_beta_sum") - 1).abs() < tol) |
              (pl.col("momentum_weight_sum") == 0)
          )
          .otherwise(None).alias("momentum_unit_beta_ok"),
    ])
)

unit_beta_ok = (
    unit_beta
    .select([
        pl.col("bab_unit_beta_ok").all(),
        pl.col("meanrev_unit_beta_ok").all(),
        pl.col("momentum_unit_beta_ok").all()
    ])
    .row(0)
)

unit_beta_ok

(True, True, True)

In [14]:
failed_unit_beta = (
    unit_beta
    .filter(
        (~pl.col("bab_unit_beta_ok")) |
        (~pl.col("meanrev_unit_beta_ok")) |
        (~pl.col("momentum_unit_beta_ok"))
    )
)

failed_unit_beta.head(50)

date,bab_beta_sum,meanrev_beta_sum,momentum_beta_sum,bab_unit_beta_ok,meanrev_unit_beta_ok,momentum_unit_beta_ok
date,f64,f64,f64,bool,bool,bool
1996-03-14,1.0,1.0,0.0,true,true,false
1995-11-07,1.0,1.0,0.0,true,true,false
1995-11-13,1.0,1.0,0.0,true,true,false
1995-07-05,1.0,0.0,0.0,true,false,false
1996-03-20,1.0,1.0,0.0,true,true,false
…,…,…,…,…,…,…
1995-08-31,1.0,1.0,0.0,true,true,false
1996-01-12,1.0,1.0,0.0,true,true,false
1996-05-22,1.0,1.0,0.0,true,true,false
1995-09-06,1.0,1.0,0.0,true,true,false


In [29]:
# --- 3. Long only ---
# For each signal, all weights should be >= 0 (no shorts allowed)
min_weights = (
    df.group_by("date")
      .agg([
          # Find the minimum weight per signal per day
          pl.min("bab_weight").alias("bab_min"),
          pl.min("meanrev_weight").alias("meanrev_min"),
          pl.min("momentum_weight").alias("momentum_min"),
      ])
      .with_columns([
          # Check constraint: min >= 0
          # If all weights are null → result = None
          pl.when(pl.col("bab_min").is_not_null())
            .then(pl.col("bab_min") >= -tol)
            .otherwise(None).alias("bab_long_only_ok"),

          pl.when(pl.col("meanrev_min").is_not_null())
            .then(pl.col("meanrev_min") >= -tol)
            .otherwise(None).alias("meanrev_long_only_ok"),

          pl.when(pl.col("momentum_min").is_not_null())
            .then(pl.col("momentum_min") >= -tol)
            .otherwise(None).alias("momentum_long_only_ok"),
      ])
)

long_only_ok = (
    min_weights
    .select([
        pl.col("bab_long_only_ok").all(),
        pl.col("meanrev_long_only_ok").all(),
        pl.col("momentum_long_only_ok").all()
    ])
    .row(0)
)

long_only_ok

(False, False, False)

In [35]:
failed_long_only = (
    min_weights
    .filter(
        (~pl.col("bab_long_only_ok")) |
        (~pl.col("meanrev_long_only_ok")) |
        (~pl.col("momentum_long_only_ok"))
    )
)

failed_long_only


date,bab_min,meanrev_min,momentum_min,bab_long_only_ok,meanrev_long_only_ok,momentum_long_only_ok
date,f64,f64,f64,bool,bool,bool
2003-01-27,-0.000002,-1.0827e-19,-3.2482e-20,false,true,true
2014-07-11,-0.000002,-2.0183e-7,-3.0368e-7,false,true,true
2015-08-20,-0.000004,-2.6170e-7,-4.2220e-7,false,true,true
2017-05-24,-0.000001,-2.3660e-7,-3.5092e-7,false,true,true
1996-12-04,-0.000002,-5.9493e-17,-3.6315e-7,false,true,true
…,…,…,…,…,…,…
2014-02-21,-0.000003,-1.5094e-7,-2.3550e-7,false,true,true
1996-03-08,-0.000002,-1.6845e-17,,false,true,
2019-07-01,-0.000002,-2.2868e-7,-6.4427e-7,false,true,true
2001-03-16,-0.000001,-3.4546e-7,-0.000002,false,true,false


In [37]:
# --- 4. No Margin ---
# Gross exposure (sum of absolute weights) must be <= 1
gross_weights = (
    df.with_columns([
        df["bab_weight"].abs().alias("bab_abs"),
        df["meanrev_weight"].abs().alias("meanrev_abs"),
        df["momentum_weight"].abs().alias("momentum_abs"),
    ])
    .group_by("date")
    .agg([
        pl.sum("bab_abs").alias("bab_gross"),
        pl.sum("meanrev_abs").alias("meanrev_gross"),
        pl.sum("momentum_abs").alias("momentum_gross"),
    ])
    .with_columns([
        pl.when(pl.col("bab_gross").is_not_null())
          .then(pl.col("bab_gross") <= 1 + tol)
          .otherwise(None).alias("bab_no_margin_ok"),

        pl.when(pl.col("meanrev_gross").is_not_null())
          .then(pl.col("meanrev_gross") <= 1 + tol)
          .otherwise(None).alias("meanrev_no_margin_ok"),

        pl.when(pl.col("momentum_gross").is_not_null())
          .then(pl.col("momentum_gross") <= 1 + tol)
          .otherwise(None).alias("momentum_no_margin_ok"),
    ])
)

no_margin_ok = (
    gross_weights
    .select([
        pl.col("bab_no_margin_ok").all(),
        pl.col("meanrev_no_margin_ok").all(),
        pl.col("momentum_no_margin_ok").all()
    ])
    .row(0)
)

no_margin_ok

(False, False, False)

In [39]:
failed_no_margin = (
    gross_weights
    .filter(
        (pl.col("bab_no_margin_ok") == False) |
        (pl.col("meanrev_no_margin_ok") == False) |
        (pl.col("momentum_no_margin_ok") == False)
    )
)

failed_no_margin

date,bab_gross,meanrev_gross,momentum_gross,bab_no_margin_ok,meanrev_no_margin_ok,momentum_no_margin_ok
date,f64,f64,f64,bool,bool,bool
2007-09-26,1.0,1.000522,1.000809,true,false,false
2014-11-28,1.0,1.00043,1.000551,true,false,false
2015-03-24,1.0,1.000592,1.000938,true,false,false
2000-07-06,1.001493,1.0,1.0,false,true,true
2018-06-21,1.0,1.000691,1.000894,true,false,false
…,…,…,…,…,…,…
2023-06-05,1.0025,1.0,1.0,false,true,true
2001-03-01,1.0,1.000473,1.0,true,false,true
2011-03-29,1.0,1.00039,1.000543,true,false,false
2008-05-06,1.002195,1.000459,1.000848,false,false,false
