# Data Exploration

Explore and analyze downloaded stock data.

In [None]:
import sys
sys.path.insert(0, '..')

from src.data_storage import DataStorage
from src.visualizer import plot_multiple_symbols, plot_correlation_matrix
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Load Multiple Stocks

In [None]:
storage = DataStorage()
symbols = ['AAPL', 'GOOGL', 'MSFT', 'AMZN', 'TSLA']

data_dict = {}
for symbol in symbols:
    df = storage.load_daily(symbol, start_date='2020-01-01')
    if df is not None:
        data_dict[symbol] = df
        print(f"{symbol}: {len(df)} records")

## Compare Stock Prices (Normalized)

In [None]:
fig = plot_multiple_symbols(data_dict, column='Close', normalize=True)
plt.show()

## Correlation Analysis

In [None]:
fig = plot_correlation_matrix(data_dict, column='Close')
plt.show()

## Data Quality Checks

In [None]:
for symbol, df in data_dict.items():
    print(f"\n{symbol}:")
    print(f"  Date range: {df.index.min()} to {df.index.max()}")
    print(f"  Records: {len(df)}")
    print(f"  Missing values: {df.isnull().sum().sum()}")
    print(f"  Price range: ${df['Close'].min():.2f} - ${df['Close'].max():.2f}")

## Statistical Summary

In [None]:
# Create summary DataFrame
summary_data = []
for symbol, df in data_dict.items():
    summary_data.append({
        'Symbol': symbol,
        'Records': len(df),
        'Min Price': df['Close'].min(),
        'Max Price': df['Close'].max(),
        'Avg Price': df['Close'].mean(),
        'Avg Volume': df['Volume'].mean()
    })

summary_df = pd.DataFrame(summary_data)
summary_df