# Feature Visualization Template

Use this notebook to visualize and validate computed features.

## Setup

Run this after computing features with:
```bash
python -m src.cli.compute --output artifacts/
```

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.pipelines.visualization import FeatureVisualizer, quick_feature_check

%matplotlib inline
plt.style.use('seaborn-v0_8-whitegrid')

## Load Features

In [None]:
# Load computed features
df = pd.read_parquet('../artifacts/features_daily.parquet')

print(f"Rows: {len(df):,}")
print(f"Symbols: {df['symbol'].nunique()}")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
print(f"Features: {len(df.columns) - 2}")  # Exclude symbol, date

In [None]:
# Initialize visualizer
viz = FeatureVisualizer(df)

## Feature Coverage Analysis

Check for NaN values across features.

In [None]:
# Summary of NaN percentages per feature
coverage = viz.coverage_summary()
print("Features with highest NaN rates:")
coverage.head(20)

In [None]:
# Features with >20% NaN (may need investigation)
high_nan = coverage[coverage['nan_pct_mean'] > 20]
print(f"Features with >20% NaN: {len(high_nan)}")
high_nan

## Single Stock Analysis

In [None]:
# Configure analysis
SYMBOL = 'AAPL'  # Change to analyze different stocks
START_DATE = '2023-01-01'
END_DATE = None  # None for latest

# Core features to visualize
FEATURES = [
    'rsi_14',
    'macd_histogram',
    'vol_regime',
    'trend_score_granular'
]

In [None]:
# Quick stats check
quick_feature_check(df, SYMBOL, FEATURES)

In [None]:
# Plot features
fig = viz.plot_single_stock(
    symbol=SYMBOL,
    features=FEATURES,
    start_date=START_DATE,
    end_date=END_DATE,
    figsize=(14, 12)
)
plt.show()

## Feature Distributions

In [None]:
# RSI distribution (should be 0-100, centered around 50)
fig = viz.plot_feature_distribution('rsi_14', bins=50)
plt.show()

In [None]:
# MACD distribution (should be centered around 0)
fig = viz.plot_feature_distribution('macd_histogram', bins=50)
plt.show()

## Feature Correlations

In [None]:
# Select features for correlation analysis
corr_features = [
    'rsi_14', 'rsi_21', 'rsi_30',
    'macd_histogram',
    'vol_regime',
    'trend_score_granular',
    'dist_sma20_z', 'dist_sma50_z',
    'ATR14'
]
# Filter to features that exist
corr_features = [f for f in corr_features if f in df.columns]

fig = viz.plot_correlation_matrix(corr_features, method='spearman')
plt.show()

## Interactive Exploration (Plotly)

Requires plotly: `pip install plotly`

In [None]:
try:
    fig = viz.interactive_stock_explorer(
        symbol=SYMBOL,
        features=['rsi_14', 'macd_histogram', 'vol_regime'],
        start_date=START_DATE
    )
    fig.show()
except Exception as e:
    print(f"Plotly not available: {e}")
    print("Install with: pip install plotly")

## Weekly Features (if computed)

In [None]:
# List weekly features
weekly_features = viz.get_features(prefix='w_')
print(f"Weekly features available: {len(weekly_features)}")
if weekly_features:
    print(weekly_features[:10])

In [None]:
# Compare daily vs weekly RSI
if 'w_rsi_14' in df.columns:
    fig = viz.plot_single_stock(
        symbol=SYMBOL,
        features=['rsi_14', 'w_rsi_14'],
        start_date=START_DATE,
        figsize=(14, 8)
    )
    plt.show()

## Targets Analysis (if computed)

In [None]:
try:
    targets = pd.read_parquet('../artifacts/targets_triple_barrier.parquet')
    print(f"Targets loaded: {len(targets):,} trajectories")
    print(f"\nClass distribution:")
    print(targets['hit'].value_counts().sort_index())
    print(f"\n-1: Lower barrier, 0: Time expired, 1: Upper barrier")
except FileNotFoundError:
    print("Targets file not found. Run compute with targets enabled.")

## Custom Analysis

Add your own analysis below.

In [None]:
# Your analysis here
