In [None]:
# Summary statistics visualization
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Strategy Performance Summary', fontsize=16, fontweight='bold')

# Distribution of returns
axes[0, 0].hist(returns.values, bins=50, alpha=0.7, color='steelblue', edgecolor='black')
axes[0, 0].axvline(returns.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {returns.mean():.4f}')
axes[0, 0].set_title('Distribution of Daily Returns')
axes[0, 0].set_xlabel('Daily Return')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].legend()

# Rolling Sharpe ratio
rolling_sharpe = (returns.rolling(60).mean() / returns.rolling(60).std()) * np.sqrt(252)
axes[0, 1].plot(rolling_sharpe.index, rolling_sharpe.values, linewidth=2, color='purple')
axes[0, 1].fill_between(rolling_sharpe.index, rolling_sharpe.values, alpha=0.3, color='purple')
axes[0, 1].set_title('Rolling 60-Day Sharpe Ratio')
axes[0, 1].set_ylabel('Sharpe Ratio')
axes[0, 1].grid(True, alpha=0.3)

# Return vs risk by month
monthly_stats = returns.resample('M').apply(lambda x: [x.mean() * 252, x.std() * np.sqrt(252)])
monthly_stats = pd.DataFrame(monthly_stats.values, columns=['Return', 'Volatility'], index=monthly_stats.index)
axes[1, 0].scatter(monthly_stats['Volatility'], monthly_stats['Return'], s=100, alpha=0.6, color='teal')
axes[1, 0].set_title('Risk vs Return by Month')
axes[1, 0].set_xlabel('Volatility (Annualized)')
axes[1, 0].set_ylabel('Return (Annualized)')
axes[1, 0].grid(True, alpha=0.3)

# Performance metrics bar chart
metrics = ['Annual Return', 'Annual Volatility', 'Sharpe Ratio', 'Win Rate']
values = [stats['Annual Return'], stats['Annual Volatility'], stats['Sharpe Ratio'], stats['Win Rate']]
colors = ['green', 'red', 'blue', 'orange']
axes[1, 1].bar(metrics, values, color=colors, alpha=0.7, edgecolor='black')
axes[1, 1].set_title('Key Performance Metrics')
axes[1, 1].set_ylabel('Value')
for i, v in enumerate(values):
    axes[1, 1].text(i, v + 0.01, f'{v:.2f}', ha='center', fontweight='bold')

plt.tight_layout()
plt.show()

print("\n" + "="*60)
print("ANALYSIS COMPLETE")
print("="*60)
print(f"Total analysis period: {returns.index[0].date()} to {returns.index[-1].date()}")
print(f"Number of trading days: {len(returns)}")
print(f"Strategy outperformed: {(returns > 0).sum()} out of {len(returns)} days ({(returns > 0).sum()/len(returns)*100:.1f}%)")

## 7. Visualize Results and Insights

Summary visualization of key findings and strategy performance.

In [None]:
# Calculate returns
returns = portfolio_values.pct_change().dropna()
cumulative_returns = (1 + returns).cumprod()

# Drawdown calculation
running_max = cumulative_returns.expanding().max()
drawdown = (cumulative_returns - running_max) / running_max

# Create multi-panel visualization
fig, axes = plt.subplots(3, 1, figsize=(14, 12))

# Daily returns
axes[0].bar(returns.index, returns.values, alpha=0.6, color='steelblue', width=1)
axes[0].set_title('Daily Strategy Returns', fontsize=12, fontweight='bold')
axes[0].set_ylabel('Daily Return')
axes[0].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
axes[0].grid(True, alpha=0.3)

# Cumulative returns
axes[1].plot(cumulative_returns.index, (cumulative_returns - 1) * 100, 
             linewidth=2, color='darkgreen')
axes[1].fill_between(cumulative_returns.index, (cumulative_returns - 1) * 100, 
                      alpha=0.3, color='green')
axes[1].set_title('Cumulative Returns', fontsize=12, fontweight='bold')
axes[1].set_ylabel('Cumulative Return (%)')
axes[1].grid(True, alpha=0.3)

# Drawdown
axes[2].fill_between(drawdown.index, drawdown.values * 100, alpha=0.6, color='red')
axes[2].plot(drawdown.index, drawdown.values * 100, linewidth=1, color='darkred')
axes[2].set_title('Portfolio Drawdown', fontsize=12, fontweight='bold')
axes[2].set_ylabel('Drawdown (%)')
axes[2].set_xlabel('Date')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Monthly returns heatmap
monthly_returns = returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
print("\nMonthly Returns:")
print(monthly_returns)

## 6. Performance Metrics and Attribution

Analyze returns, drawdowns, and risk metrics in detail.

In [None]:
# Run backtest
print("Running backtest...")
backtest = Backtest(
    price_data=price_data,
    factor_signals=combined_factors,
    initial_capital=100000,
    rebalance_freq='M',  # Monthly rebalancing
    top_n=5  # Hold top 5 stocks
)

portfolio_values = backtest.run()
print("✓ Backtest complete")

# Get statistics
stats = backtest.get_statistics()
print_performance_summary(stats)

# Portfolio value over time
fig, ax = plt.subplots(figsize=(14, 6))
ax.plot(portfolio_values.index, portfolio_values.values, linewidth=2, color='darkblue')
ax.fill_between(portfolio_values.index, portfolio_values.values, alpha=0.3)
ax.set_title('Portfolio Value Over Time', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Portfolio Value ($)')
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 5. Backtest Strategy Performance

Run the complete backtest with monthly rebalancing.

In [None]:
# Define factor weights
weights = {
    'momentum': 0.4,
    'volatility': 0.3,
    'rsi': 0.3
}

# Combine factors
factors_dict = {
    'momentum': momentum_norm,
    'volatility': volatility_norm,
    'rsi': rsi_norm
}

combined_factors = combine_factors(factors_dict, weights)

print("=" * 60)
print("PORTFOLIO COMPOSITION WEIGHTS")
print("=" * 60)
for factor, weight in weights.items():
    print(f"{factor:.<30} {weight:.1%}")

print("\n" + "=" * 60)
print("COMPOSITE FACTOR SCORES (Latest 5 dates)")
print("=" * 60)
print(combined_factors.iloc[-5:, :5])

# Visualize composite scores
fig, ax = plt.subplots(figsize=(14, 6))
for ticker in tickers[:5]:  # Plot first 5 stocks
    ax.plot(combined_factors.index, combined_factors[ticker], label=ticker, alpha=0.7)

ax.set_title('Composite Factor Scores Over Time', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Composite Factor Score')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

## 4. Portfolio Construction

Combine factors with custom weights and visualize composite signals.

In [None]:
# Visualize factor distributions
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Factor Distributions (Normalized)', fontsize=16, fontweight='bold')

momentum_norm.mean(axis=1).hist(ax=axes[0, 0], bins=30, alpha=0.7, color='blue')
axes[0, 0].set_title('Momentum Distribution')
axes[0, 0].set_xlabel('Normalized Momentum')

volatility_norm.mean(axis=1).hist(ax=axes[0, 1], bins=30, alpha=0.7, color='green')
axes[0, 1].set_title('Volatility Distribution (Inverted)')
axes[0, 1].set_xlabel('Normalized Volatility')

rsi_norm.mean(axis=1).hist(ax=axes[1, 0], bins=30, alpha=0.7, color='red')
axes[1, 0].set_title('RSI Distribution')
axes[1, 0].set_xlabel('Normalized RSI')

mean_reversion_norm.mean(axis=1).hist(ax=axes[1, 1], bins=30, alpha=0.7, color='purple')
axes[1, 1].set_title('Mean Reversion Distribution')
axes[1, 1].set_xlabel('Normalized Mean Reversion')

plt.tight_layout()
plt.show()

# Correlation heatmap
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, 
            square=True, ax=ax, cbar_kws={'label': 'Correlation'})
ax.set_title('Factor Correlation Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

In [None]:
# Normalize factors
momentum_norm = normalize_factors(momentum)
volatility_norm = normalize_factors(-volatility)  # Inverse: lower volatility is better
rsi_norm = normalize_factors(rsi)
mean_reversion_norm = normalize_factors(mean_reversion)

# Display factor statistics
print("=" * 60)
print("FACTOR STATISTICS")
print("=" * 60)

for name, factor in [('Momentum', momentum_norm), 
                      ('Volatility', volatility_norm), 
                      ('RSI', rsi_norm),
                      ('Mean Reversion', mean_reversion_norm)]:
    print(f"\n{name}:")
    print(f"  Mean:     {factor.mean().mean():>10.4f}")
    print(f"  Std Dev:  {factor.std().mean():>10.4f}")
    print(f"  Min:      {factor.min().min():>10.4f}")
    print(f"  Max:      {factor.max().max():>10.4f}")

# Correlation between factors
print("\n" + "=" * 60)
print("FACTOR CORRELATIONS (Latest date)")
print("=" * 60)

latest_factors = pd.DataFrame({
    'Momentum': momentum_norm.iloc[-1],
    'Volatility': volatility_norm.iloc[-1],
    'RSI': rsi_norm.iloc[-1],
    'Mean Reversion': mean_reversion_norm.iloc[-1]
})

corr_matrix = latest_factors.corr()
print(corr_matrix)

## 3. Factor Analysis and Correlation

Analyze factor distributions and correlations to understand relationships between factors.

In [None]:
# Initialize factor calculator
factor_calc = FactorCalculator(price_data)

# Calculate factors
print("Calculating factors...")
momentum = factor_calc.calculate_momentum(lookback=20)
volatility = factor_calc.calculate_volatility(lookback=20)
rsi = factor_calc.calculate_rsi(lookback=14)
mean_reversion = factor_calc.calculate_mean_reversion(lookback=20)

print("✓ Momentum factor calculated")
print("✓ Volatility factor calculated")
print("✓ RSI factor calculated")
print("✓ Mean reversion factor calculated")

# Display sample factor values
print(f"\nMomentum factor (last 5 rows):")
print(momentum.iloc[-5:, :5])

print(f"\nVolatility factor (last 5 rows):")
print(volatility.iloc[-5:, :5])

## 2. Calculate Financial Factors

Compute multiple quantitative factors: momentum, volatility, RSI, and MACD.

In [None]:
# Define parameters
tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'FB', 'NVDA', 'JPM', 'JNJ', 'V']
start_date = '2022-01-01'
end_date = '2024-01-01'

print(f"Downloading data for {len(tickers)} stocks from {start_date} to {end_date}...")
price_data = download_stock_data(tickers, start_date, end_date)

# Clean data
for ticker in price_data:
    price_data[ticker] = clean_data(price_data[ticker])

print(f"Successfully downloaded data for {len(price_data)} assets")
print(f"\nData shape for {tickers[0]}: {price_data[tickers[0]].shape}")
print(f"\nFirst few rows of {tickers[0]}:")
price_data[tickers[0]].head()

## 1. Import Required Libraries and Load Data

Load historical price data for a basket of stocks using yfinance.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

# Import custom modules
from src.data_loader import download_stock_data, clean_data
from src.factors import FactorCalculator, normalize_factors, combine_factors
from src.backtest import Backtest
from src.utils import (
    calculate_returns, calculate_cumulative_returns, 
    calculate_sharpe_ratio, calculate_max_drawdown,
    print_performance_summary, analyze_factor_performance
)

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 6)