# Results Analysis and Visualization

This notebook analyzes model results and creates visualizations.

## Contents:
1. Load model results
2. Forecast visualization
3. Residual analysis
4. Error distribution
5. Performance by segment
6. Business insights

In [None]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import sys

sys.path.append(str(Path.cwd().parent))

from src.visualization import (
    plot_forecast,
    plot_residuals,
    plot_model_comparison,
    plot_seasonal_decomposition
)

sns.set_style("whitegrid")
%matplotlib inline

## 1. Load Results

In [None]:
# Load evaluation results if available
results_path = Path.cwd().parent / "models" / "evaluation_results.csv"

if results_path.exists():
    results_df = pd.read_csv(results_path, index_col=0)
    print("Model Performance Results:")
    print(results_df.to_string())
else:
    print("No results file found. Run the training pipeline first.")
    results_df = None

## 2. Forecast Visualization

In [None]:
# Example: Create sample forecast visualization
# In practice, you would load actual forecasts from saved models

dates = pd.date_range('2023-11-01', periods=42, freq='D')
np.random.seed(42)

# Simulated data for demonstration
actuals = 100 + 10 * np.sin(np.arange(42) * 2 * np.pi / 7) + np.random.normal(0, 5, 42)
forecast = actuals + np.random.normal(0, 3, 42)

plot_forecast(
    actuals=actuals,
    forecasts=forecast,
    dates=dates,
    title='6-Week Forecast vs Actual'
)

## 3. Residual Analysis

In [None]:
# Residual analysis
residuals = actuals - forecast

plot_residuals(
    residuals=residuals,
    dates=dates,
    title='Residual Analysis'
)

## 4. Model Comparison

In [None]:
if results_df is not None:
    # Plot comparison for different metrics
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    metrics = ['mae', 'rmse', 'mape', 'smape']
    
    for idx, metric in enumerate(metrics):
        ax = axes[idx // 2, idx % 2]
        if metric in results_df.columns:
            sorted_results = results_df.sort_values(metric)
            ax.barh(sorted_results.index, sorted_results[metric], color='steelblue')
            ax.set_xlabel(metric.upper())
            ax.set_title(f'Model Comparison by {metric.upper()}')
            ax.grid(True, alpha=0.3, axis='x')
    
    plt.tight_layout()
    plt.show()

## 5. Error Distribution

In [None]:
# Error distribution analysis
errors = actuals - forecast
percentage_errors = (errors / actuals) * 100

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Absolute errors
axes[0].hist(errors, bins=20, edgecolor='black', alpha=0.7)
axes[0].axvline(x=0, color='r', linestyle='--', linewidth=2)
axes[0].set_title('Distribution of Forecast Errors')
axes[0].set_xlabel('Error (Actual - Forecast)')
axes[0].set_ylabel('Frequency')
axes[0].grid(True, alpha=0.3)

# Percentage errors
axes[1].hist(percentage_errors, bins=20, edgecolor='black', alpha=0.7, color='coral')
axes[1].axvline(x=0, color='r', linestyle='--', linewidth=2)
axes[1].set_title('Distribution of Percentage Errors')
axes[1].set_xlabel('Percentage Error (%)')
axes[1].set_ylabel('Frequency')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"Mean Error: {errors.mean():.2f}")
print(f"Std Error: {errors.std():.2f}")
print(f"Mean Absolute Error: {np.abs(errors).mean():.2f}")
print(f"Mean Percentage Error: {percentage_errors.mean():.2f}%")

## 6. Performance by Time Period

In [None]:
# Analyze performance by week
df_analysis = pd.DataFrame({
    'date': dates,
    'actual': actuals,
    'forecast': forecast,
    'error': errors,
    'abs_error': np.abs(errors)
})

df_analysis['week'] = (df_analysis.index // 7) + 1

weekly_performance = df_analysis.groupby('week').agg({
    'abs_error': 'mean',
    'error': 'std'
})

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

axes[0].bar(weekly_performance.index, weekly_performance['abs_error'])
axes[0].set_title('Mean Absolute Error by Week')
axes[0].set_xlabel('Week')
axes[0].set_ylabel('MAE')
axes[0].grid(True, alpha=0.3, axis='y')

axes[1].bar(weekly_performance.index, weekly_performance['error'], color='coral')
axes[1].set_title('Error Standard Deviation by Week')
axes[1].set_xlabel('Week')
axes[1].set_ylabel('Std Dev')
axes[1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## 7. Business Insights

In [None]:
# Calculate business metrics
total_actual = actuals.sum()
total_forecast = forecast.sum()
bias = ((total_forecast - total_actual) / total_actual) * 100

print("=" * 50)
print("BUSINESS METRICS")
print("=" * 50)
print(f"Total Actual Sales: ${total_actual:,.2f}")
print(f"Total Forecasted Sales: ${total_forecast:,.2f}")
print(f"Forecast Bias: {bias:+.2f}%")
print(f"\nAverage Daily Sales: ${actuals.mean():,.2f}")
print(f"Sales Volatility (Std): ${actuals.std():,.2f}")
print(f"\nForecast Accuracy (MAPE): {np.mean(np.abs(percentage_errors)):.2f}%")
print(f"Forecast Precision (Std of Errors): ${errors.std():.2f}")

## 8. Recommendations

### Model Performance:
- Best performing model should be deployed to production
- Monitor forecast accuracy weekly
- Retrain models monthly with new data

### Business Actions:
- Use forecasts for inventory planning
- Adjust staffing based on predicted demand
- Plan promotions during low-demand periods
- Set safety stock levels based on forecast uncertainty

### Model Improvements:
- Incorporate external factors (weather, events)
- Add store-specific models for better accuracy
- Implement online learning for concept drift
- Develop confidence intervals for risk management