In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

In [2]:
path = "data/returns.csv"
returns = pd.read_csv(path, index_col=0, parse_dates=True)
returns.head()

Unnamed: 0_level_0,0,A,AA,AAL,AAP,AAPL,AAS,ABBV,ABC,ABMD,...,XOM,XRAY,XYL,YUM,ZBH,ZBRA,ZION,ZMH,ZMX,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2001-01-02,0.0,-0.070776,-0.037313,0.0,0.0,0.0,-0.011139,0.0,0.0,-0.072165,...,0.025162,-0.003195,0.0,-0.032197,0.0,0.038682,-0.04004,0.0,0.0,0.0
2001-01-03,0.0,0.103194,0.013566,0.0,0.0,0.10084,0.020025,0.0,0.0,0.094444,...,-0.043478,-0.051282,0.0,0.056751,0.0,0.10767,0.033368,0.0,0.0,0.0
2001-01-04,0.0,0.037862,0.032505,0.0,0.0,0.041985,-0.082209,0.0,0.0,-0.081218,...,-0.027859,-0.064189,0.0,0.001852,0.0,-0.003995,-0.020182,0.0,-0.011628,0.0
2001-01-05,0.0,-0.054721,-0.018519,0.0,0.0,-0.040293,-0.034759,0.0,0.0,-0.104972,...,0.004525,0.01083,0.0,-0.031423,0.0,-0.029412,-0.011329,0.0,0.0,0.0
2001-01-08,0.0,-0.032917,0.015094,0.0,0.0,0.01145,0.060942,0.0,0.0,-0.185185,...,-0.004505,0.030357,0.0,-0.005725,0.0,-0.015152,0.002083,0.0,0.017647,0.0


In [3]:
def mask_pre_ipo(df):
    # For each stock, treat leading zeros as NA
    df2 = df.copy()
    for col in df2:
        s = df2[col]
        first_nonzero = s.ne(0).idxmax()  # first non-zero return
        df2.loc[:first_nonzero, col] = np.nan
    return df2

def safe_rolling_zscore(df, window):
    rolling_mean = df.rolling(window).mean()
    rolling_std = df.rolling(window).std()

    # If std == 0 → return 0 instead of NaN or inf
    z = (df - rolling_mean) / rolling_std.replace(0, np.nan)
    z = z.fillna(0)

    return z


# 1. Mask pre-IPO zeros  
returns = mask_pre_ipo(returns)

# 2. Compute stable rolling z  
features = safe_rolling_zscore(returns, window=60)

# 3. Replace infinities / residual NaN  
features = features.replace([np.inf, -np.inf], 0).fillna(0)
