In [1]:
import pandas as pd
import numpy as np

In [None]:
%%time
import torch

In [2]:
df = pd.read_csv('data/macro_index_returns.csv')


In [3]:
columns = ["RETX", "date","TICKER"]
df = df[columns]

In [4]:
def convert_to_number(x):
    try:
        return float(x)
    except:
        return None

df['ret_parsed'] = df['RETX'].apply(convert_to_number)

In [6]:
df["date"] = pd.to_datetime(df["date"])
df.set_index("date", inplace=True)
df.drop(columns=["RETX"], inplace=True)

In [8]:
df = df.pivot_table(index=df.index, columns="TICKER", values="ret_parsed", aggfunc="first")

In [9]:
df.fillna(0, inplace=True)

In [12]:
from data_preprocess import safe_rolling_zscore

# 2. Compute stable rolling z  
features = safe_rolling_zscore(df, window=60)

# 3. Replace infinities / residual NaN  
features = features.replace([np.inf, -np.inf], 0).fillna(0)

In [13]:
features

TICKER,GLD,IEF,SPY,USO,UUP
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2008-01-02,0.000000,0.000000,0.000000,0.000000,0.000000
2008-01-03,0.000000,0.000000,0.000000,0.000000,0.000000
2008-01-04,0.000000,0.000000,0.000000,0.000000,0.000000
2008-01-07,0.000000,0.000000,0.000000,0.000000,0.000000
2008-01-08,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...
2024-12-24,0.197975,0.376423,1.317603,0.355554,0.295191
2024-12-26,0.666938,0.428528,-0.116051,-0.386350,-0.388157
2024-12-27,-0.641424,-0.382519,-1.439297,0.486069,-0.324800
2024-12-30,-0.281215,1.592691,-1.502935,0.703788,0.081034


In [14]:
rolling_vol  =df.rolling(60).std() * np.sqrt(252)

In [15]:
rolling_vol 

TICKER,GLD,IEF,SPY,USO,UUP
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2008-01-02,,,,,
2008-01-03,,,,,
2008-01-04,,,,,
2008-01-07,,,,,
2008-01-08,,,,,
...,...,...,...,...,...
2024-12-24,0.164410,0.065835,0.124132,0.313311,0.108526
2024-12-26,0.163561,0.065841,0.122465,0.307293,0.108415
2024-12-27,0.164143,0.065813,0.124707,0.307397,0.108249
2024-12-30,0.164252,0.066733,0.127087,0.296919,0.108113


In [None]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

# ------------------------------------
# Example: build features from returns
# ------------------------------------
window = 20  # number of past days you want to use

asset_cols = df.columns  # 5 assets
assert list(asset_cols) == list(rolling_vol.columns)

X_list = []
vol_list = []
y_list = []

# We use t as the "decision" day (use past returns up to t, predict t+1)
for t in range(window, len(df) - 1):
    # past window of returns: shape (window, 5) -> flatten
    past_ret = df.iloc[t-window:t].values  # (window, 5)
    feat = past_ret.flatten()  # (window * 5,)

    X_list.append(feat)
    vol_list.append(rolling_vol.iloc[t].values)      # current vol for day t (5,)
    y_list.append(df.iloc[t+1].values)          # next-day returns (5,)

X = np.array(X_list, dtype=np.float32)          # (N, window*5)
vol = np.array(vol_list, dtype=np.float32)      # (N, 5)
y = np.array(y_list, dtype=np.float32)          # (N, 5)
