# Retail Sales Forecasting - Getting Started

This notebook demonstrates how to use the forecasting modules.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append('..')

# Import custom modules
from src.data.loader import load_data, get_data_info
from src.data.cleaner import clean_data
from src.features.engineer import engineer_features
from src.models.arima import ARIMAForecaster
from src.models.moving_average import MovingAverageForecaster
from src.utils.metrics import calculate_all_metrics, print_metrics

## 1. Load and Explore Data

Place your CSV file in `data/raw/` folder and update the path below.

In [None]:
# Load data (update path to your file)
# df = load_data('../data/raw/your_sales_data.csv')

# For testing, create sample data
np.random.seed(42)
dates = pd.date_range(start='2023-01-01', periods=365, freq='D')
sales = 100 + np.cumsum(np.random.randn(365)) + 20 * np.sin(np.arange(365) * 2 * np.pi / 30)
df = pd.DataFrame({'date': dates, 'sales': sales})

print(df.head())
print(f"\nShape: {df.shape}")

## 2. Clean Data

In [None]:
# Clean the data
df_clean = clean_data(df, date_column='date')
print(f"Cleaned data shape: {df_clean.shape}")

## 3. Feature Engineering

In [None]:
# Create features
df_features = engineer_features(df_clean, date_column='date', target_column='sales')
print(f"Features created: {list(df_features.columns)}")

## 4. Train Models

In [None]:
# Split data
train_size = int(len(df_clean) * 0.8)
train_data = df_clean['sales'][:train_size]
test_data = df_clean['sales'][train_size:]

print(f"Train size: {len(train_data)}")
print(f"Test size: {len(test_data)}")

In [None]:
# ARIMA Model
arima = ARIMAForecaster(order=(2, 1, 2))
arima.fit(train_data)
arima_predictions = arima.predict(steps=len(test_data))

# Moving Average Model
ma = MovingAverageForecaster(window=7, method='simple')
ma.fit(train_data)
ma_predictions = ma.predict(steps=len(test_data))

## 5. Evaluate Models

In [None]:
# Calculate metrics
arima_metrics = calculate_all_metrics(test_data.values, arima_predictions.values)
print_metrics(arima_metrics, 'ARIMA')

# Note: MA predicts constant value, so metrics will differ
ma_pred_values = [ma_predictions.iloc[0]] * len(test_data)
ma_metrics = calculate_all_metrics(test_data.values, ma_pred_values)
print_metrics(ma_metrics, 'Moving Average')

## 6. Visualize Results

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(df_clean['date'], df_clean['sales'], label='Actual', alpha=0.7)
plt.axvline(x=df_clean['date'].iloc[train_size], color='r', linestyle='--', label='Train/Test Split')
plt.title('Retail Sales Forecasting')
plt.xlabel('Date')
plt.ylabel('Sales')
plt.legend()
plt.tight_layout()
plt.show()