# 03 - Feature Engineering

This notebook creates EMAs, calculates Greeks, and derives features.

In [None]:
import sys
sys.path.append('../src')

from features import calculate_emas, process_greeks, calculate_derived_features, aggregate_option_metrics
import pandas as pd
import numpy as np

In [None]:
data_dir = '../data'

# Load Clean Data
spot_df = pd.read_csv(f'{data_dir}/nifty_spot_5min_clean.csv')
futures_df = pd.read_csv(f'{data_dir}/nifty_futures_5min_clean.csv')
options_df = pd.read_csv(f'{data_dir}/nifty_options_5min_clean.csv')

# Rename columns
spot_df = spot_df.rename(columns={'close': 'close_spot', 'open': 'open_spot'})
spot_df['timestamp'] = pd.to_datetime(spot_df['timestamp'])
futures_df['timestamp'] = pd.to_datetime(futures_df['timestamp'])
options_df['timestamp'] = pd.to_datetime(options_df['timestamp'])

In [None]:
# Calculate EMAs
spot_df = calculate_emas(spot_df, period_fast=5, period_slow=15)
print("EMAs calculated:")
spot_df[['timestamp', 'close_spot', 'ema_5', 'ema_15']].head(20)

In [None]:
# Aggregate Option Metrics
agg_options = aggregate_option_metrics(options_df)
print("Aggregated Option Metrics:")
agg_options.head()

In [None]:
# Merge spot with options for Greeks calculation
options_with_spot = pd.merge(options_df, spot_df[['timestamp', 'close_spot']], on='timestamp')

# Calculate Greeks (sample for speed)
print("Calculating Greeks (this may take a while)...")
options_greeks = process_greeks(options_with_spot.head(1000))  # Sample
options_greeks[['strike', 'type', 'delta', 'gamma', 'theta', 'vega']].head()

In [None]:
# Build Final Feature Set
final_df = spot_df.copy()

# Merge futures
final_df = pd.merge(final_df, futures_df[['timestamp', 'close', 'oi']].rename(columns={'close': 'close_fut'}), on='timestamp')

# Merge aggregated options
agg_options['timestamp'] = pd.to_datetime(agg_options['timestamp'])
final_df = pd.merge(final_df, agg_options, on='timestamp', how='left')

# Calculate derived features
final_df = calculate_derived_features(final_df)

print(f"Final Feature Set: {final_df.shape}")
final_df.head()

In [None]:
# Feature Summary
print("=== Feature Columns ===")
for col in final_df.columns:
    print(f"- {col}: {final_df[col].dtype}")