diff --git a/.typos.toml b/.typos.toml
index abd36cef..60cccd8d 100644
--- a/.typos.toml
+++ b/.typos.toml
@@ -50,6 +50,12 @@ preprocessor = "preprocessor"
logits = "logits"
analyse = "analyse"
Labour = "Labour"
+# Forecasting and statistical terms
+MAPE = "MAPE"
+mape = "mape"
+yhat = "yhat"
+yhat_lower = "yhat_lower"
+yhat_upper = "yhat_upper"
[default]
locale = "en-us"
diff --git a/README.md b/README.md
index c7355648..5b0b87b8 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,7 @@ etc.
| [Huggingface to Sagemaker](huggingface-sagemaker) | 🚀 MLOps | 🔄 CI/CD, 📦 Deployment | mlflow, sagemaker, kubeflow |
| [Databricks Production QA](databricks-production-qa-demo) | 🚀 MLOps | 📊 Monitoring, 🔍 Quality Assurance | databricks, evidently, shap |
| [Eurorate Predictor](eurorate-predictor) | 📊 Data | ⏱️ Time Series, 🔄 ETL | airflow, bigquery, xgboost |
+| [RetailForecast](retail-forecast) | 📊 Data | ⏱️ Time Series, 📈 Forecasting, 🔮 Multi-Model | prophet, zenml, pandas |
# 💻 System Requirements
diff --git a/retail-forecast/Dockerfile.codespace b/retail-forecast/Dockerfile.codespace
new file mode 100644
index 00000000..df6a27a1
--- /dev/null
+++ b/retail-forecast/Dockerfile.codespace
@@ -0,0 +1,42 @@
+# Sandbox base image
+FROM zenmldocker/zenml-sandbox:latest
+
+# Install uv from official distroless image
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
+
+# Set uv environment variables for optimization
+ENV UV_SYSTEM_PYTHON=1
+ENV UV_COMPILE_BYTECODE=1
+
+# Project metadata
+LABEL project_name="retail-forecast"
+LABEL project_version="0.1.0"
+
+# Install dependencies with uv and cache optimization
+RUN --mount=type=cache,target=/root/.cache/uv \
+ uv pip install --system \
+ "zenml>=0.82.0" \
+ "numpy>=1.20.0" \
+ "pandas>=1.3.0" \
+ "matplotlib>=3.5.0" \
+ "prophet>=1.1.0" \
+ "typing_extensions>=4.0.0" \
+ "pyarrow" \
+ "fastparquet" \
+ "plotly" \
+ "notebook"
+
+# Set workspace directory
+WORKDIR /workspace
+
+# Clone only the project directory and reorganize
+RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \
+ cp -r /tmp/zenml-projects/retail-forecast/* /workspace/ && \
+ rm -rf /tmp/zenml-projects
+
+# VSCode settings
+RUN mkdir -p /workspace/.vscode && \
+ printf '{\n "workbench.colorTheme": "Default Dark Modern"\n}' > /workspace/.vscode/settings.json
+
+# Create assets directory for visualizations
+RUN mkdir -p /workspace/assets
\ No newline at end of file
diff --git a/retail-forecast/README.md b/retail-forecast/README.md
new file mode 100644
index 00000000..3915a47f
--- /dev/null
+++ b/retail-forecast/README.md
@@ -0,0 +1,207 @@
+# RetailForecast: Production-Ready Sales Forecasting with ZenML and Prophet
+
+A robust MLOps pipeline for retail sales forecasting designed for retail data scientists and ML engineers.
+
+## 📊 Business Context
+
+In retail, accurate demand forecasting is critical for optimizing inventory, staff scheduling, and financial planning. This project provides a production-ready sales forecasting solution that can be immediately deployed in retail environments to:
+
+- Predict future sales volumes across multiple stores and products
+- Capture seasonal patterns and trends in customer purchasing behavior
+- Support data-driven inventory management and purchasing decisions
+- Provide actionable insights through visual forecasting dashboards
+
+
+
+
Retail Sales Historical Data Analysis
+
Interactive visualization of sales patterns across stores and products.
+
+ """)
+
+ # Create overview metrics
+ total_sales = sales_data["sales"].sum()
+ avg_daily_sales = sales_data.groupby("date")["sales"].sum().mean()
+ num_stores = sales_data["store"].nunique()
+ num_items = sales_data["item"].nunique()
+ min_date = sales_data["date"].min().strftime("%Y-%m-%d")
+ max_date = sales_data["date"].max().strftime("%Y-%m-%d")
+ date_range = f"{min_date} to {max_date}"
+
+ html_parts.append(f"""
+
+
Dataset Overview
+
+
+
Total Sales
+
{total_sales:,.0f} units
+
+
+
Avg. Daily Sales
+
{avg_daily_sales:,.1f} units
+
+
+
Stores × Items
+
{num_stores} × {num_items}
+
+
+
Date Range
+
{date_range}
+
+
+
+ """)
+
+ # 1. Time Series - Overall Sales Trend
+ df_daily = sales_data.groupby("date")["sales"].sum().reset_index()
+ fig_trend = px.line(
+ df_daily,
+ x="date",
+ y="sales",
+ title="Daily Total Sales Across All Stores and Products",
+ template="plotly_white",
+ )
+ fig_trend.update_traces(line=dict(width=2))
+ fig_trend.update_layout(
+ xaxis_title="Date", yaxis_title="Total Sales (units)", height=500
+ )
+ trend_html = fig_trend.to_html(full_html=False, include_plotlyjs="cdn")
+ html_parts.append(f"""
+
+
Overall Sales Trend
+ {trend_html}
+
+
Insights: Observe weekly patterns and special events that impact overall sales volume.
+
+
+ """)
+
+ # 2. Store Comparison
+ store_sales = (
+ sales_data.groupby(["date", "store"])["sales"].sum().reset_index()
+ )
+ fig_stores = px.line(
+ store_sales,
+ x="date",
+ y="sales",
+ color="store",
+ title="Sales Comparison by Store",
+ template="plotly_white",
+ )
+ fig_stores.update_layout(
+ xaxis_title="Date", yaxis_title="Total Sales (units)", height=500
+ )
+ stores_html = fig_stores.to_html(full_html=False, include_plotlyjs="cdn")
+ html_parts.append(f"""
+
+
Store Comparison
+ {stores_html}
+
+
Insights: Compare performance across different stores to identify top performers and potential issues.
+
+
+ """)
+
+ # 3. Product Performance
+ item_sales = (
+ sales_data.groupby(["date", "item"])["sales"].sum().reset_index()
+ )
+ fig_items = px.line(
+ item_sales,
+ x="date",
+ y="sales",
+ color="item",
+ title="Sales Comparison by Product",
+ template="plotly_white",
+ )
+ fig_items.update_layout(
+ xaxis_title="Date", yaxis_title="Total Sales (units)", height=500
+ )
+ items_html = fig_items.to_html(full_html=False, include_plotlyjs="cdn")
+ html_parts.append(f"""
+
+
Product Performance
+ {items_html}
+
+
Insights: Identify best-selling products and those with unique seasonal patterns.
+
+
+ """)
+
+ # 4. Weekly Patterns
+ sales_data["day_of_week"] = sales_data["date"].dt.day_name()
+ day_order = [
+ "Monday",
+ "Tuesday",
+ "Wednesday",
+ "Thursday",
+ "Friday",
+ "Saturday",
+ "Sunday",
+ ]
+ weekly_pattern = (
+ sales_data.groupby("day_of_week")["sales"]
+ .mean()
+ .reindex(day_order)
+ .reset_index()
+ )
+
+ fig_weekly = px.bar(
+ weekly_pattern,
+ x="day_of_week",
+ y="sales",
+ title="Average Sales by Day of Week",
+ template="plotly_white",
+ color="sales",
+ color_continuous_scale="Blues",
+ )
+ fig_weekly.update_layout(
+ xaxis_title="", yaxis_title="Average Sales (units)", height=500
+ )
+ weekly_html = fig_weekly.to_html(full_html=False, include_plotlyjs="cdn")
+ html_parts.append(f"""
+
+
Weekly Patterns
+ {weekly_html}
+
+
Insights: Identify peak sales days to optimize inventory and staffing.
+
+
+ """)
+
+ # 5. Sample Store-Item Combinations
+ # Select 3 random series to display
+ sample_series = np.random.choice(
+ series_ids, size=min(3, len(series_ids)), replace=False
+ )
+
+ # Create subplots for train/test visualization
+ fig_samples = make_subplots(
+ rows=len(sample_series),
+ cols=1,
+ subplot_titles=[f"Series: {series_id}" for series_id in sample_series],
+ shared_xaxes=True,
+ vertical_spacing=0.1,
+ )
+
+ for i, series_id in enumerate(sample_series):
+ train_data = train_data_dict[series_id]
+ test_data = test_data_dict[series_id]
+
+ # Add train data
+ fig_samples.add_trace(
+ go.Scatter(
+ x=train_data["ds"],
+ y=train_data["y"],
+ mode="lines+markers",
+ name=f"{series_id} (Training)",
+ line=dict(color="blue"),
+ legendgroup=series_id,
+ showlegend=(i == 0),
+ ),
+ row=i + 1,
+ col=1,
+ )
+
+ # Add test data
+ fig_samples.add_trace(
+ go.Scatter(
+ x=test_data["ds"],
+ y=test_data["y"],
+ mode="lines+markers",
+ name=f"{series_id} (Test)",
+ line=dict(color="green"),
+ legendgroup=series_id,
+ showlegend=(i == 0),
+ ),
+ row=i + 1,
+ col=1,
+ )
+
+ fig_samples.update_layout(
+ height=300 * len(sample_series),
+ title_text="Train/Test Split for Sample Series",
+ template="plotly_white",
+ )
+
+ samples_html = fig_samples.to_html(full_html=False, include_plotlyjs="cdn")
+ html_parts.append(f"""
+
+
Sample Series with Train/Test Split
+ {samples_html}
+
+
Insights: Visualize how historical data is split into training and testing sets for model evaluation.
+
+
+ """)
+
+ # Close HTML document
+ html_parts.append("""
+
+
+
+ """)
+
+ # Combine all HTML parts
+ complete_html = "".join(html_parts)
+
+ # Return as HTMLString
+ return HTMLString(complete_html)
diff --git a/retail-forecast/steps/model_evaluator.py b/retail-forecast/steps/model_evaluator.py
new file mode 100644
index 00000000..9fdcc3fa
--- /dev/null
+++ b/retail-forecast/steps/model_evaluator.py
@@ -0,0 +1,303 @@
+import base64
+from io import BytesIO
+from typing import Dict, List, Tuple
+import logging
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from prophet import Prophet
+from typing_extensions import Annotated
+from zenml import log_metadata, step
+from zenml.types import HTMLString
+
+logger = logging.getLogger(__name__)
+
+
+@step
+def evaluate_models(
+ models: Dict[str, Prophet],
+ test_data_dict: Dict[str, pd.DataFrame],
+ series_ids: List[str],
+ forecast_horizon: int = 7,
+) -> Tuple[
+ Annotated[Dict[str, float], "performance_metrics"],
+ Annotated[HTMLString, "evaluation_report"],
+]:
+ """Evaluate Prophet models on test data and log metrics.
+
+ Args:
+ models: Dictionary of trained Prophet models
+ test_data_dict: Dictionary of test data for each series
+ series_ids: List of series identifiers
+ forecast_horizon: Number of future time periods to forecast
+
+ Returns:
+ performance_metrics: Dictionary of average metrics across all series
+ evaluation_report: HTML report with evaluation metrics and visualizations
+ """
+ # Initialize metrics storage
+ all_metrics = {"mae": [], "rmse": [], "mape": []}
+
+ series_metrics = {}
+
+ # Create a figure for plotting forecasts
+ plt.figure(figsize=(12, len(series_ids) * 4))
+
+ for i, series_id in enumerate(series_ids):
+ logger.info(f"Evaluating model for {series_id}...")
+ model = models[series_id]
+ test_data = test_data_dict[series_id]
+
+ # Debug: Check that test data exists
+ logger.info(f"Test data shape for {series_id}: {test_data.shape}")
+ logger.info(
+ f"Test data date range: {test_data['ds'].min()} to {test_data['ds'].max()}"
+ )
+
+ # Create future dataframe starting from the FIRST test date, not from training data
+ future_dates = test_data["ds"].unique()
+ if len(future_dates) == 0:
+ logger.info(
+ f"WARNING: No test data dates for {series_id}, skipping evaluation"
+ )
+ continue
+
+ # Make predictions for test dates
+ forecast = model.predict(pd.DataFrame({"ds": future_dates}))
+
+ # Print debug info
+ logger.info(f"Forecast shape: {forecast.shape}")
+ logger.info(
+ f"Forecast date range: {forecast['ds'].min()} to {forecast['ds'].max()}"
+ )
+
+ # Merge forecasts with test data correctly
+ merged_data = pd.merge(
+ test_data,
+ forecast[["ds", "yhat", "yhat_lower", "yhat_upper"]],
+ on="ds",
+ how="inner", # Only keep matching dates
+ )
+
+ logger.info(f"Merged data shape: {merged_data.shape}")
+ if merged_data.empty:
+ logger.info(
+ f"WARNING: No matching dates between test data and forecast for {series_id}"
+ )
+ continue
+
+ # Calculate metrics only if we have merged data
+ if len(merged_data) > 0:
+ # Calculate metrics
+ actuals = merged_data["y"].values
+ predictions = merged_data["yhat"].values
+
+ # Debug metrics calculation
+ logger.info(f"Actuals range: {actuals.min()} to {actuals.max()}")
+ logger.info(
+ f"Predictions range: {predictions.min()} to {predictions.max()}"
+ )
+
+ mae = np.mean(np.abs(actuals - predictions))
+ rmse = np.sqrt(np.mean((actuals - predictions) ** 2))
+
+ # Handle zeros in actuals for MAPE calculation
+ mask = actuals != 0
+ if np.any(mask):
+ mape = (
+ np.mean(
+ np.abs(
+ (actuals[mask] - predictions[mask]) / actuals[mask]
+ )
+ )
+ * 100
+ )
+ else:
+ mape = np.nan
+
+ # Store metrics
+ series_metrics[series_id] = {
+ "mae": mae,
+ "rmse": rmse,
+ "mape": mape,
+ }
+
+ all_metrics["mae"].append(mae)
+ all_metrics["rmse"].append(rmse)
+ if not np.isnan(mape):
+ all_metrics["mape"].append(mape)
+
+ logger.info(
+ f"Metrics for {series_id}: MAE={mae:.2f}, RMSE={rmse:.2f}, MAPE={mape:.2f}%"
+ )
+
+ # Plot the forecast vs actual for this series
+ plt.subplot(len(series_ids), 1, i + 1)
+ plt.plot(merged_data["ds"], merged_data["y"], "b.", label="Actual")
+ plt.plot(
+ merged_data["ds"], merged_data["yhat"], "r-", label="Forecast"
+ )
+ plt.fill_between(
+ merged_data["ds"],
+ merged_data["yhat_lower"],
+ merged_data["yhat_upper"],
+ color="gray",
+ alpha=0.2,
+ )
+ plt.title(f"Forecast vs Actual for {series_id}")
+ plt.legend()
+
+ # Calculate average metrics across all series
+ if not all_metrics["mae"]:
+ logger.info("WARNING: No valid metrics calculated!")
+ average_metrics = {
+ "avg_mae": np.nan,
+ "avg_rmse": np.nan,
+ "avg_mape": np.nan,
+ }
+ else:
+ average_metrics = {
+ "avg_mae": np.mean(all_metrics["mae"]),
+ "avg_rmse": np.mean(all_metrics["rmse"]),
+ "avg_mape": np.mean(all_metrics["mape"])
+ if all_metrics["mape"]
+ else np.nan,
+ }
+
+ # Save plot to buffer
+ buf = BytesIO()
+ plt.tight_layout()
+ plt.savefig(buf, format="png")
+ buf.seek(0)
+ plot_data = base64.b64encode(buf.read()).decode("utf-8")
+ plt.close()
+
+ # Log metrics to ZenML
+ log_metadata(
+ metadata={
+ "avg_mae": float(average_metrics["avg_mae"])
+ if not np.isnan(average_metrics["avg_mae"])
+ else 0.0,
+ "avg_rmse": float(average_metrics["avg_rmse"])
+ if not np.isnan(average_metrics["avg_rmse"])
+ else 0.0,
+ "avg_mape": float(average_metrics["avg_mape"])
+ if not np.isnan(average_metrics["avg_mape"])
+ else 0.0,
+ }
+ )
+
+ logger.info(f"Final Average MAE: {average_metrics['avg_mae']:.2f}")
+ logger.info(f"Final Average RMSE: {average_metrics['avg_rmse']:.2f}")
+ logger.info(
+ f"Final Average MAPE: {average_metrics['avg_mape']:.2f}%"
+ if not np.isnan(average_metrics["avg_mape"])
+ else "Final Average MAPE: N/A"
+ )
+
+ # Create HTML report
+ html_report = create_evaluation_report(
+ average_metrics, series_metrics, plot_data
+ )
+
+ return average_metrics, html_report
+
+
+def create_evaluation_report(average_metrics, series_metrics, plot_image_data):
+ """Create an HTML report for model evaluation."""
+ # Create a table for series-specific metrics
+ series_rows = ""
+ for series_id, metrics in series_metrics.items():
+ mape_value = (
+ f"{metrics['mape']:.2f}%"
+ if not np.isnan(metrics.get("mape", np.nan))
+ else "N/A"
+ )
+ series_rows += f"""
+