In [2]:
# SPX Index Volatility Analysis

** Comprehensive analysis of S&P 500 Index realized and implied volatility from July 2022 to July 2025**

---

## Setup and Data Loading

```python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)

print("📊 SPX Index Volatility Analysis")
print("=" * 50)
```

```python
# Load the historical volatility data
data_path = r'C:\Users\acmuser\PycharmProjects\BloombergData\data\historical_volatility\historical_volatility_latest.csv'
df = pd.read_csv(data_path)

# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

# Filter for SPX Index only
spx_data = df[df['ticker'] == 'SPX Index'].copy()

print(f"✅ Data loaded successfully")
print(f"   Total SPX observations: {len(spx_data):,}")
print(f"   Date range: {spx_data['date'].min().strftime('%Y-%m-%d')} to {spx_data['date'].max().strftime('%Y-%m-%d')}")
print(f"   Data types: {spx_data['data_type'].unique()}")
```

---

## 1. Data Overview and Quality Check

```python
# Basic data info
print("🔍 SPX DATA OVERVIEW")
print("=" * 30)

# Split data by type
spx_realized = spx_data[spx_data['data_type'] == 'realized'].copy()
spx_implied = spx_data[spx_data['data_type'] == 'implied'].copy()

print(f"Realized volatility observations: {len(spx_realized):,}")
print(f"Implied volatility observations: {len(spx_implied):,}")

# Check data completeness
print(f"\n📈 REALIZED VOLATILITY DATA QUALITY")
realized_cols = ['realized_vol_30d', 'realized_vol_90d', 'realized_vol_180d', 'realized_vol_252d']
for col in realized_cols:
    non_null = spx_realized[col].notna().sum()
    total = len(spx_realized)
    pct = (non_null / total) * 100
    print(f"   {col}: {non_null:,}/{total:,} ({pct:.1f}%)")

print(f"\n📉 IMPLIED VOLATILITY DATA QUALITY")
implied_cols = ['implied_vol_1m_atm', 'implied_vol_3m_atm', 'implied_vol_6m_atm', 'implied_vol_12m_atm']
for col in implied_cols:
    non_null = spx_implied[col].notna().sum()
    total = len(spx_implied)
    pct = (non_null / total) * 100
    print(f"   {col}: {non_null:,}/{total:,} ({pct:.1f}%)")
```

```python
# Sample data preview
print("\n📊 SAMPLE DATA")
print("\nRealized Volatility Sample:")
print(spx_realized[['date'] + realized_cols].head().to_string(index=False))

print("\nImplied Volatility Sample:")
print(spx_implied[['date'] + implied_cols].head().to_string(index=False))
```

---

## 2. Realized Volatility Time Series Analysis

```python
# Create comprehensive realized volatility chart
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('30-Day Realized Volatility', '90-Day Realized Volatility', 
                   '180-Day Realized Volatility', '252-Day Realized Volatility'),
    vertical_spacing=0.1
)

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728']
vol_periods = ['30d', '90d', '180d', '252d']

for i, (period, color) in enumerate(zip(vol_periods, colors)):
    row = (i // 2) + 1
    col = (i % 2) + 1
    
    col_name = f'realized_vol_{period}'
    
    fig.add_trace(
        go.Scatter(
            x=spx_realized['date'],
            y=spx_realized[col_name],
            mode='lines',
            name=f'{period.upper()} Realized Vol',
            line=dict(color=color, width=2),
            hovertemplate=f'<b>{period.upper()} Realized Vol</b><br>' +
                         'Date: %{x}<br>' +
                         'Volatility: %{y:.2f}%<extra></extra>'
        ),
        row=row, col=col
    )

fig.update_layout(
    title='SPX Index Realized Volatility Term Structure (2022-2025)',
    height=800,
    showlegend=False,
    template='plotly_white'
)

# Add range selector
fig.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=6, label="6M", step="month", stepmode="backward"),
                dict(count=1, label="1Y", step="year", stepmode="backward"),
                dict(count=2, label="2Y", step="year", stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(visible=True),
        type="date"
    )
)

fig.show()
```

```python
# Realized volatility summary statistics
print("📊 REALIZED VOLATILITY SUMMARY STATISTICS")
print("=" * 50)

summary_stats = spx_realized[realized_cols].describe()
print(summary_stats.round(2))

# Correlation matrix
print(f"\n🔗 REALIZED VOLATILITY CORRELATIONS")
correlations = spx_realized[realized_cols].corr()
print(correlations.round(3))
```

---

## 3. Implied Volatility Time Series Analysis

```python
# Create implied volatility chart
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('1-Month Implied Vol (ATM)', '3-Month Implied Vol (ATM)', 
                   '6-Month Implied Vol (ATM)', '12-Month Implied Vol (ATM)'),
    vertical_spacing=0.1
)

implied_periods = ['1m', '3m', '6m', '12m']

for i, (period, color) in enumerate(zip(implied_periods, colors)):
    row = (i // 2) + 1
    col = (i % 2) + 1
    
    col_name = f'implied_vol_{period}_atm'
    
    fig.add_trace(
        go.Scatter(
            x=spx_implied['date'],
            y=spx_implied[col_name],
            mode='lines',
            name=f'{period.upper()} Implied Vol',
            line=dict(color=color, width=2),
            hovertemplate=f'<b>{period.upper()} Implied Vol</b><br>' +
                         'Date: %{x}<br>' +
                         'Volatility: %{y:.2f}%<extra></extra>'
        ),
        row=row, col=col
    )

fig.update_layout(
    title='SPX Index Implied Volatility Term Structure (2022-2025)',
    height=800,
    showlegend=False,
    template='plotly_white'
)

fig.show()
```

```python
# Implied volatility summary statistics
print("📊 IMPLIED VOLATILITY SUMMARY STATISTICS")
print("=" * 50)

summary_stats = spx_implied[implied_cols].describe()
print(summary_stats.round(2))

# Correlation matrix
print(f"\n🔗 IMPLIED VOLATILITY CORRELATIONS")
correlations = spx_implied[implied_cols].corr()
print(correlations.round(3))
```

---

## 4. Realized vs Implied Volatility Comparison

```python
# Merge realized and implied data for comparison
spx_realized_clean = spx_realized[['date', 'realized_vol_90d']].dropna()
spx_implied_clean = spx_implied[['date', 'implied_vol_3m_atm']].dropna()

# Merge on date (closest match for 90d realized vs 3m implied)
comparison_data = pd.merge(spx_realized_clean, spx_implied_clean, on='date', how='inner')

print(f"📈 REALIZED vs IMPLIED COMPARISON")
print(f"   Overlapping observations: {len(comparison_data):,}")
print(f"   Date range: {comparison_data['date'].min().strftime('%Y-%m-%d')} to {comparison_data['date'].max().strftime('%Y-%m-%d')}")
```

```python
# Create realized vs implied comparison chart
fig = go.Figure()

# Add realized volatility
fig.add_trace(
    go.Scatter(
        x=comparison_data['date'],
        y=comparison_data['realized_vol_90d'],
        mode='lines',
        name='90-Day Realized Vol',
        line=dict(color='#1f77b4', width=2),
        hovertemplate='<b>90-Day Realized Vol</b><br>' +
                     'Date: %{x}<br>' +
                     'Volatility: %{y:.2f}%<extra></extra>'
    )
)

# Add implied volatility
fig.add_trace(
    go.Scatter(
        x=comparison_data['date'],
        y=comparison_data['implied_vol_3m_atm'],
        mode='lines',
        name='3-Month Implied Vol (ATM)',
        line=dict(color='#ff7f0e', width=2),
        hovertemplate='<b>3-Month Implied Vol</b><br>' +
                     'Date: %{x}<br>' +
                     'Volatility: %{y:.2f}%<extra></extra>'
    )
)

# Add volatility spread
vol_spread = comparison_data['implied_vol_3m_atm'] - comparison_data['realized_vol_90d']
fig.add_trace(
    go.Scatter(
        x=comparison_data['date'],
        y=vol_spread,
        mode='lines',
        name='Vol Spread (Implied - Realized)',
        line=dict(color='#2ca02c', width=2, dash='dash'),
        yaxis='y2',
        hovertemplate='<b>Vol Spread</b><br>' +
                     'Date: %{x}<br>' +
                     'Spread: %{y:.2f}%<extra></extra>'
    )
)

# Add zero line for spread
fig.add_hline(y=0, line_dash="dot", line_color="gray", opacity=0.5)

fig.update_layout(
    title='SPX Index: Realized vs Implied Volatility Comparison',
    xaxis_title='Date',
    yaxis_title='Volatility (%)',
    yaxis2=dict(
        title='Vol Spread (%)',
        overlaying='y',
        side='right'
    ),
    height=600,
    template='plotly_white',
    legend=dict(x=0.02, y=0.98)
)

fig.show()
```

```python
# Volatility spread analysis
comparison_data['vol_spread'] = comparison_data['implied_vol_3m_atm'] - comparison_data['realized_vol_90d']

print("📊 VOLATILITY SPREAD ANALYSIS")
print("=" * 40)
print(f"Average vol spread: {comparison_data['vol_spread'].mean():.2f}%")
print(f"Median vol spread: {comparison_data['vol_spread'].median():.2f}%")
print(f"Std dev of spread: {comparison_data['vol_spread'].std():.2f}%")
print(f"Max spread: {comparison_data['vol_spread'].max():.2f}% on {comparison_data.loc[comparison_data['vol_spread'].idxmax(), 'date'].strftime('%Y-%m-%d')}")
print(f"Min spread: {comparison_data['vol_spread'].min():.2f}% on {comparison_data.loc[comparison_data['vol_spread'].idxmin(), 'date'].strftime('%Y-%m-%d')}")

# Percentage of time implied > realized
implied_premium_pct = (comparison_data['vol_spread'] > 0).mean() * 100
print(f"\nImplied vol > Realized vol: {implied_premium_pct:.1f}% of the time")
```

---

## 5. Volatility Regime Analysis

```python
# Define volatility regimes based on 30-day realized vol
def categorize_vol_regime(vol):
    if pd.isna(vol):
        return 'Unknown'
    elif vol < 15:
        return 'Low Vol'
    elif vol < 25:
        return 'Normal Vol'
    elif vol < 35:
        return 'Elevated Vol'
    else:
        return 'High Vol'

# Add regime classification
spx_realized['vol_regime'] = spx_realized['realized_vol_30d'].apply(categorize_vol_regime)

# Regime summary
regime_summary = spx_realized['vol_regime'].value_counts()
print("📊 VOLATILITY REGIME BREAKDOWN")
print("=" * 40)
for regime, count in regime_summary.items():
    pct = (count / len(spx_realized)) * 100
    print(f"{regime}: {count:,} days ({pct:.1f}%)")
```

```python
# Create volatility regime visualization
fig = go.Figure()

# Define colors for regimes
regime_colors = {
    'Low Vol': '#2ca02c',
    'Normal Vol': '#1f77b4', 
    'Elevated Vol': '#ff7f0e',
    'High Vol': '#d62728'
}

# Add realized volatility line
fig.add_trace(
    go.Scatter(
        x=spx_realized['date'],
        y=spx_realized['realized_vol_30d'],
        mode='lines',
        name='30-Day Realized Vol',
        line=dict(color='black', width=2),
        hovertemplate='<b>30-Day Realized Vol</b><br>' +
                     'Date: %{x}<br>' +
                     'Volatility: %{y:.2f}%<extra></extra>'
    )
)

# Add regime thresholds
fig.add_hline(y=15, line_dash="dash", line_color="green", annotation_text="Low Vol Threshold (15%)")
fig.add_hline(y=25, line_dash="dash", line_color="blue", annotation_text="Normal Vol Threshold (25%)")
fig.add_hline(y=35, line_dash="dash", line_color="orange", annotation_text="Elevated Vol Threshold (35%)")

fig.update_layout(
    title='SPX Index: Volatility Regime Analysis (30-Day Realized)',
    xaxis_title='Date',
    yaxis_title='Volatility (%)',
    height=600,
    template='plotly_white'
)

fig.show()
```

---

## 6. Term Structure Analysis

```python
# Calculate average term structure by regime
term_structure_analysis = []

for regime in ['Low Vol', 'Normal Vol', 'Elevated Vol', 'High Vol']:
    regime_data = spx_realized[spx_realized['vol_regime'] == regime]
    if len(regime_data) > 0:
        avg_30d = regime_data['realized_vol_30d'].mean()
        avg_90d = regime_data['realized_vol_90d'].mean()
        avg_180d = regime_data['realized_vol_180d'].mean()
        avg_252d = regime_data['realized_vol_252d'].mean()
        
        term_structure_analysis.append({
            'regime': regime,
            '30d': avg_30d,
            '90d': avg_90d,
            '180d': avg_180d,
            '252d': avg_252d
        })

term_structure_df = pd.DataFrame(term_structure_analysis)
print("📊 AVERAGE TERM STRUCTURE BY REGIME")
print("=" * 50)
print(term_structure_df.round(2))
```

```python
# Create term structure chart by regime
fig = go.Figure()

periods = ['30d', '90d', '180d', '252d']
period_labels = ['30 Days', '90 Days', '180 Days', '252 Days']

for _, row in term_structure_df.iterrows():
    regime = row['regime']
    values = [row['30d'], row['90d'], row['180d'], row['252d']]
    
    fig.add_trace(
        go.Scatter(
            x=period_labels,
            y=values,
            mode='lines+markers',
            name=regime,
            line=dict(width=3),
            marker=dict(size=8),
            hovertemplate=f'<b>{regime}</b><br>' +
                         'Period: %{x}<br>' +
                         'Avg Volatility: %{y:.2f}%<extra></extra>'
        )
    )

fig.update_layout(
    title='SPX Index: Average Volatility Term Structure by Regime',
    xaxis_title='Time Period',
    yaxis_title='Average Volatility (%)',
    height=600,
    template='plotly_white'
)

fig.show()
```

---

## 7. Key Insights and Summary

```python
# Calculate key metrics for summary
latest_date = spx_realized['date'].max()
latest_30d_vol = spx_realized[spx_realized['date'] == latest_date]['realized_vol_30d'].iloc[0]
latest_regime = spx_realized[spx_realized['date'] == latest_date]['vol_regime'].iloc[0]

# Historical percentiles
vol_30d_clean = spx_realized['realized_vol_30d'].dropna()
current_percentile = (vol_30d_clean < latest_30d_vol).mean() * 100

print("🎯 KEY INSIGHTS & SUMMARY")
print("=" * 50)
print(f"📅 Analysis Period: {spx_realized['date'].min().strftime('%B %Y')} to {spx_realized['date'].max().strftime('%B %Y')}")
print(f"📊 Total Trading Days: {len(spx_realized):,}")
print(f"\n🔍 CURRENT VOLATILITY STATUS")
print(f"   Latest 30-day realized vol: {latest_30d_vol:.2f}%")
print(f"   Current regime: {latest_regime}")
print(f"   Historical percentile: {current_percentile:.1f}%")

print(f"\n📈 HISTORICAL VOLATILITY RANGES")
print(f"   30-day realized vol range: {vol_30d_clean.min():.1f}% - {vol_30d_clean.max():.1f}%")
print(f"   Average 30-day vol: {vol_30d_clean.mean():.1f}%")
print(f"   Median 30-day vol: {vol_30d_clean.median():.1f}%")

print(f"\n⚡ VOLATILITY REGIME PERIODS")
for regime, count in regime_summary.items():
    if regime != 'Unknown':
        pct = (count / len(spx_realized)) * 100
        print(f"   {regime}: {pct:.1f}% of time")

if len(comparison_data) > 0:
    print(f"\n🎯 IMPLIED vs REALIZED")
    print(f"   Average vol spread: {comparison_data['vol_spread'].mean():.2f}%")
    print(f"   Implied premium {implied_premium_pct:.1f}% of time")

print(f"\n📊 DATA QUALITY")
print(f"   Realized vol completeness: {(spx_realized['realized_vol_30d'].notna().sum() / len(spx_realized) * 100):.1f}%")
if len(spx_implied) > 0:
    print(f"   Implied vol completeness: {(spx_implied['implied_vol_3m_atm'].notna().sum() / len(spx_implied) * 100):.1f}%")
```

---

## Next Steps

This analysis provides the foundation for understanding SPX volatility patterns. Key areas for further research:

1. **Component Analysis** - How individual stocks compare to the index
2. **Event Studies** - Volatility around earnings, FOMC meetings, etc.
3. **Forecasting Models** - Can we predict vol regime changes?
4. **Risk Management** - Position sizing based on vol regimes

**Files for further analysis:**
- Historical volatility database: `historical_volatility_latest.csv`
- Component weights: `spx_weights_latest.csv`

SyntaxError: invalid syntax (2715524201.py, line 5)