In [32]:
import pandas as pd
import numpy as np
import os
import ta

# Constants
DATA_DIR = "../data"
TICKERS = ['SPY', 'QQQ', 'IWM']
SAVE_PREFIX = "features_"

def compute_features(df):
    df = df.copy()

    # Fix stringified tuple column names
    df.columns = [col.replace("('", "").replace("')", "").replace("', '", "_") for col in df.columns]

    # Identify the Date column (now likely 'Date_')
    date_col = next((col for col in df.columns if col.lower().startswith("date")), None)
    if date_col is None:
        raise KeyError(f"Could not locate date column. Got columns: {df.columns.tolist()}")

    df[date_col] = pd.to_datetime(df[date_col])
    df.set_index(date_col, inplace=True)
    df.sort_index(inplace=True)

    # Choose appropriate price column
    price_col = next((c for c in df.columns if "close" in c.lower()), None)

    # Feature engineering
    df['daily_return'] = df[price_col].pct_change()
    df['rolling_vol_5'] = df['daily_return'].rolling(window=5).std()
    df['sma_10'] = df[price_col].rolling(window=10).mean()
    df['sma_20'] = df[price_col].rolling(window=20).mean()
    df['rsi_14'] = ta.momentum.RSIIndicator(df[price_col], window=14).rsi()
    df['momentum_10'] = df[price_col] - df[price_col].shift(10)

    df['ret_1'] = df['daily_return'].shift(1)

    df['price_above_sma_10'] = (df[price_col] > df['sma_10']).astype(int)

    bb = ta.volatility.BollingerBands(close=df[price_col], window=20, window_dev=2)
    df['bb_width'] = bb.bollinger_hband() - bb.bollinger_lband()


    df['target'] = (df['daily_return'].shift(-1) > 0).astype(int)
    df.dropna(inplace=True)
    df.reset_index(inplace=True)

    return df

# Process each ticker
for ticker in TICKERS:
    input_path = os.path.join(DATA_DIR, f"{ticker}.parquet")
    df = pd.read_parquet(input_path)

    print(f"🔧 Processing {ticker}")
    features_df = compute_features(df)

    output_path = os.path.join(DATA_DIR, f"{SAVE_PREFIX}{ticker}.parquet")
    features_df.to_parquet(output_path, index=False)
    print(f"✅ Saved to {output_path}\n")

print("🎉 All tickers processed.")


🔧 Processing SPY
✅ Saved to ../data/features_SPY.parquet

🔧 Processing QQQ
✅ Saved to ../data/features_QQQ.parquet

🔧 Processing IWM
✅ Saved to ../data/features_IWM.parquet

🎉 All tickers processed.
