In [148]:
import polars as pl
import datetime as dt
import numpy as np

In [149]:
def compute_weights(covariance, expected_returns): #This works
    w = np.linalg.solve(covariance, expected_returns)
    return w/np.sum(w)

def weights_error(cov_est, cov_real, expected_returns): #This definitely doesn't work
    return np.linalg.norm(compute_weights(cov_est, expected_returns) - compute_weights(cov_real, expected_returns))


In [150]:
start_date = dt.date(1996,7,3) #min value 1996,7,3
end_date = dt.date(2024, 12, 31) #max value 2024, 12, 31

bab = pl.read_parquet("signal_bab.parquet") #1
meanrev = pl.read_parquet("signal_meanrev.parquet") #2
momentum = pl.read_parquet("signal_momentum.parquet") #3

expected_returns_data = (
    bab.join(meanrev, on="date", how="inner")
       .join(momentum, on="date", how="inner")
).select(
    pl.col("date").alias("date"),
    pl.col("bab_return").rolling_mean(45).mul(45/46).alias("expected_returns1"),
    pl.col("meanrev_return").rolling_mean(45).mul(45/46).alias("expected_returns2"),
    pl.col("momentum_return").rolling_mean(45).mul(45/46).alias("expected_returns3")
).filter(pl.col("date").is_between(start_date, end_date))

weekday = list(map(lambda date : date.weekday(), bab.filter(pl.col("date").is_between(start_date, end_date)).select(pl.col("date").alias("date")).to_series().to_list())) 

returns1 = bab.filter(pl.col("date").is_between(start_date, end_date)).select(pl.col("bab_return").alias("return1")).to_numpy()
returns2 = meanrev.filter(pl.col("date").is_between(start_date, end_date)).select(pl.col("meanrev_return").alias("return2")).to_numpy()
returns3 = momentum.filter(pl.col("date").is_between(start_date, end_date)).select(pl.col("momentum_return").alias("return3")).to_numpy()

len = returns1.size

returns = np.column_stack((np.ravel(returns1), 
                           np.ravel(returns2), 
                           np.ravel(returns3)))

expected_returns = expected_returns_data.select(
    pl.col("expected_returns1").alias("expected_returns1"),
    pl.col("expected_returns2").alias("expected_returns2"),
    pl.col("expected_returns3").alias("expected_returns3")
).to_numpy()

dif_returns = expected_returns - returns

outer_product = [(np.outer((dif_returns[i]), (dif_returns[i]))) for i in range(0, len)] 
#I had to rename this variable because it didn't mean what I thought it meant.
#I don't know what to call this so I've named it outer product.

def recurse(x_new, x_prev, day, r) : #Converges to the geometrically weighted mean.
    if (day == 0): return (1-r)*x_new + r*x_prev #If the day is monday, we use an alternative formula because weekends are non-trading days.
    else: return (1-r)*x_new + r*x_prev

r = 0.96 #After 120 days, the contribution of lower terms should be negligable. I just chose r so that r^120 < 0.01

#Remember, the estimated covariance matrix is the expected covariance matrix for the next day.
cov_matrix_est = [outer_product[0]]
for i in range(1, len-1):
    cov_matrix_est.append(recurse(outer_product[i], cov_matrix_est[i-1], weekday[i], r))

#This is the estimated covariance matrix for each day
#Make sure to estimate the covariance matrix for atleast 120 days. 
#This will insure that the junk starting value will have a negligible effect on the estimated covariance matrix.

print(cov_matrix_est)

#Use the compute_weights function to get the signal weights
#1 -> bab
#2 -> meanrev
#3 -> momentum

[array([[0.00036762, 0.00044861, 0.0001949 ],
       [0.00044861, 0.00054745, 0.00023784],
       [0.0001949 , 0.00023784, 0.00010333]]), array([[0.00036372, 0.00046594, 0.00019423],
       [0.00046594, 0.00064076, 0.00025159],
       [0.00019423, 0.00025159, 0.0001039 ]]), array([[3.49637942e-04, 4.43409159e-04, 1.86569098e-04],
       [4.43409159e-04, 6.47350367e-04, 2.40625638e-04],
       [1.86569098e-04, 2.40625638e-04, 9.97657312e-05]]), array([[3.35917635e-04, 4.24294722e-04, 1.78408776e-04],
       [4.24294722e-04, 6.28616982e-04, 2.34625211e-04],
       [1.78408776e-04, 2.34625211e-04, 9.76098175e-05]]), array([[0.00034514, 0.00042461, 0.00020011],
       [0.00042461, 0.00061666, 0.00024724],
       [0.00020011, 0.00024724, 0.00013041]]), array([[0.00033235, 0.0004099 , 0.0001945 ],
       [0.0004099 , 0.00059706, 0.00024269],
       [0.0001945 , 0.00024269, 0.00013083]]), array([[0.0003427 , 0.00042614, 0.00022283],
       [0.00042614, 0.00061824, 0.00028284],
       [0.00022