# Financial Data Exploratory Data Analysis

This notebook demonstrates comprehensive exploratory data analysis (EDA) for financial data, showcasing data science skills in the finance domain.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## 1. Data Collection and Initial Exploration

In [None]:
# Define stocks for analysis
tickers = ['AAPL', 'GOOGL', 'MSFT', 'TSLA', 'AMZN', 'NVDA', 'META', 'NFLX']
period = '2y'

# Fetch data
stock_data = {}
for ticker in tickers:
    stock_data[ticker] = yf.Ticker(ticker).history(period=period)
    print(f"Fetched {len(stock_data[ticker])} records for {ticker}")

# Display basic info for AAPL
print("\nAAPL Data Shape:", stock_data['AAPL'].shape)
print("\nAAPL Data Info:")
stock_data['AAPL'].info()

In [None]:
# Display first few rows
stock_data['AAPL'].head()

## 2. Data Quality Assessment

In [None]:
# Check for missing values
print("Missing Values Analysis:")
for ticker in tickers:
    missing = stock_data[ticker].isnull().sum()
    print(f"{ticker}: {missing.sum()} total missing values")
    if missing.sum() > 0:
        print(missing[missing > 0])
    print()

In [None]:
# Statistical summary
print("Statistical Summary for AAPL:")
stock_data['AAPL'].describe()

## 3. Price Analysis and Visualization

In [None]:
# Create closing price comparison
closing_prices = pd.DataFrame({ticker: stock_data[ticker]['Close'] for ticker in tickers})

# Normalize prices for comparison
normalized_prices = closing_prices / closing_prices.iloc[0]

# Plot normalized prices
fig = go.Figure()

for ticker in tickers:
    fig.add_trace(go.Scatter(
        x=normalized_prices.index,
        y=normalized_prices[ticker],
        mode='lines',
        name=ticker,
        line=dict(width=2)
    ))

fig.update_layout(
    title='Stock Price Performance Comparison (Normalized)',
    xaxis_title='Date',
    yaxis_title='Normalized Price',
    template='plotly_white',
    height=600
)

fig.show()

## 4. Returns Analysis

In [None]:
# Calculate daily returns
returns = closing_prices.pct_change().dropna()

# Display returns statistics
print("Daily Returns Statistics:")
returns_stats = returns.describe()
returns_stats

In [None]:
# Returns distribution visualization
fig, axes = plt.subplots(2, 4, figsize=(20, 10))
axes = axes.ravel()

for i, ticker in enumerate(tickers):
    axes[i].hist(returns[ticker], bins=50, alpha=0.7, edgecolor='black')
    axes[i].set_title(f'{ticker} Daily Returns Distribution')
    axes[i].set_xlabel('Daily Return')
    axes[i].set_ylabel('Frequency')
    axes[i].axvline(returns[ticker].mean(), color='red', linestyle='--', label='Mean')
    axes[i].legend()

plt.tight_layout()
plt.show()

## 5. Volatility Analysis

In [None]:
# Calculate rolling volatility
volatility_window = 30
rolling_volatility = returns.rolling(window=volatility_window).std() * np.sqrt(252)

# Plot rolling volatility
fig = go.Figure()

for ticker in tickers:
    fig.add_trace(go.Scatter(
        x=rolling_volatility.index,
        y=rolling_volatility[ticker],
        mode='lines',
        name=ticker
    ))

fig.update_layout(
    title=f'{volatility_window}-Day Rolling Volatility (Annualized)',
    xaxis_title='Date',
    yaxis_title='Volatility',
    template='plotly_white',
    height=500
)

fig.show()

## 6. Correlation Analysis

In [None]:
# Calculate correlation matrix
correlation_matrix = returns.corr()

# Create correlation heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, 
            annot=True, 
            cmap='coolwarm', 
            center=0,
            square=True,
            fmt='.3f')
plt.title('Stock Returns Correlation Matrix')
plt.tight_layout()
plt.show()

print("\nHighest Correlations:")
# Get upper triangle of correlation matrix
upper_tri = correlation_matrix.where(np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool))
correlations = upper_tri.stack().sort_values(ascending=False)
print(correlations.head(10))

## 7. Volume Analysis

In [None]:
# Volume analysis
volumes = pd.DataFrame({ticker: stock_data[ticker]['Volume'] for ticker in tickers})

# Average daily volume
avg_volumes = volumes.mean().sort_values(ascending=False)

# Plot average volumes
fig = go.Figure(data=[go.Bar(
    x=avg_volumes.index,
    y=avg_volumes.values,
    marker_color='lightblue'
)])

fig.update_layout(
    title='Average Daily Trading Volume',
    xaxis_title='Stock',
    yaxis_title='Volume',
    template='plotly_white'
)

fig.show()

print("Average Daily Volumes:")
for ticker, volume in avg_volumes.items():
    print(f"{ticker}: {volume:,.0f}")

## 8. Risk-Return Analysis

In [None]:
# Calculate annualized returns and volatility
annual_returns = returns.mean() * 252
annual_volatility = returns.std() * np.sqrt(252)

# Create risk-return scatter plot
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=annual_volatility,
    y=annual_returns,
    mode='markers+text',
    text=tickers,
    textposition='top center',
    marker=dict(size=12, color='blue'),
    name='Stocks'
))

fig.update_layout(
    title='Risk-Return Profile',
    xaxis_title='Annualized Volatility',
    yaxis_title='Annualized Return',
    template='plotly_white',
    height=500
)

fig.show()

# Display risk-return metrics
risk_return_df = pd.DataFrame({
    'Annual Return': annual_returns,
    'Annual Volatility': annual_volatility,
    'Sharpe Ratio': annual_returns / annual_volatility
}).sort_values('Sharpe Ratio', ascending=False)

print("\nRisk-Return Metrics:")
risk_return_df

## 9. Technical Analysis

In [None]:
# Technical analysis for AAPL
aapl_data = stock_data['AAPL'].copy()

# Calculate moving averages
aapl_data['MA_20'] = aapl_data['Close'].rolling(window=20).mean()
aapl_data['MA_50'] = aapl_data['Close'].rolling(window=50).mean()
aapl_data['MA_200'] = aapl_data['Close'].rolling(window=200).mean()

# Calculate RSI
def calculate_rsi(prices, window=14):
    delta = prices.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

aapl_data['RSI'] = calculate_rsi(aapl_data['Close'])

# Create technical analysis chart
fig = make_subplots(
    rows=3, cols=1,
    subplot_titles=('Price with Moving Averages', 'Volume', 'RSI'),
    vertical_spacing=0.1,
    row_heights=[0.6, 0.2, 0.2]
)

# Price and moving averages
fig.add_trace(go.Scatter(x=aapl_data.index, y=aapl_data['Close'], name='Close', line=dict(color='black')), row=1, col=1)
fig.add_trace(go.Scatter(x=aapl_data.index, y=aapl_data['MA_20'], name='MA 20', line=dict(color='orange')), row=1, col=1)
fig.add_trace(go.Scatter(x=aapl_data.index, y=aapl_data['MA_50'], name='MA 50', line=dict(color='blue')), row=1, col=1)
fig.add_trace(go.Scatter(x=aapl_data.index, y=aapl_data['MA_200'], name='MA 200', line=dict(color='red')), row=1, col=1)

# Volume
fig.add_trace(go.Bar(x=aapl_data.index, y=aapl_data['Volume'], name='Volume', marker_color='lightblue'), row=2, col=1)

# RSI
fig.add_trace(go.Scatter(x=aapl_data.index, y=aapl_data['RSI'], name='RSI', line=dict(color='purple')), row=3, col=1)
fig.add_hline(y=70, line_dash="dash", line_color="red", row=3, col=1)
fig.add_hline(y=30, line_dash="dash", line_color="green", row=3, col=1)

fig.update_layout(
    title='AAPL Technical Analysis',
    template='plotly_white',
    height=800
)

fig.show()

## 10. Key Insights and Conclusions

In [None]:
# Summary statistics
print("=== FINANCIAL DATA ANALYSIS SUMMARY ===")
print(f"\nAnalysis Period: {period}")
print(f"Number of Stocks: {len(tickers)}")
print(f"Total Trading Days: {len(closing_prices)}")

print("\n=== PERFORMANCE RANKINGS ===")
total_returns = (closing_prices.iloc[-1] / closing_prices.iloc[0] - 1) * 100
performance_ranking = total_returns.sort_values(ascending=False)

print("\nTotal Returns Ranking:")
for i, (ticker, return_pct) in enumerate(performance_ranking.items(), 1):
    print(f"{i}. {ticker}: {return_pct:.2f}%")

print("\n=== RISK ANALYSIS ===")
volatility_ranking = annual_volatility.sort_values()
print("\nVolatility Ranking (Low to High):")
for i, (ticker, vol) in enumerate(volatility_ranking.items(), 1):
    print(f"{i}. {ticker}: {vol:.2f}%")

print("\n=== SHARPE RATIO RANKING ===")
sharpe_ranking = risk_return_df['Sharpe Ratio'].sort_values(ascending=False)
print("\nSharpe Ratio Ranking:")
for i, (ticker, sharpe) in enumerate(sharpe_ranking.items(), 1):
    print(f"{i}. {ticker}: {sharpe:.3f}")

print("\n=== KEY INSIGHTS ===")
print(f"• Best Performer: {performance_ranking.index[0]} (+{performance_ranking.iloc[0]:.2f}%)")
print(f"• Worst Performer: {performance_ranking.index[-1]} ({performance_ranking.iloc[-1]:.2f}%)")
print(f"• Least Volatile: {volatility_ranking.index[0]} ({volatility_ranking.iloc[0]:.2f}% volatility)")
print(f"• Most Volatile: {volatility_ranking.index[-1]} ({volatility_ranking.iloc[-1]:.2f}% volatility)")
print(f"• Best Risk-Adjusted Return: {sharpe_ranking.index[0]} (Sharpe: {sharpe_ranking.iloc[0]:.3f})")
print(f"• Average Correlation: {correlation_matrix.values[np.triu_indices_from(correlation_matrix.values, k=1)].mean():.3f}")