# 04 â€” Product & Category Performance

This notebook analyses product and category performance using the reporting mart (`report_products_sample`). It identifies which categories and SKUs drive revenue and customer reach.

In [None]:
"""Repository bootstrap for notebooks and ad-hoc execution.

This module ensures the repository root (the directory containing 'src/') is added
to sys.path so that imports like `from src...` work reliably when running
notebooks from the /notebooks folder (or any subfolder inside the repo).
"""
import bootstrap


In [None]:
import sys
from pathlib import Path


In [None]:
import pandas as pd
import numpy as np

from src.io import load_sample, project_root
from src import quality, kpi_metrics as metrics
from src.viz import line_chart, barh_top

dfs = load_sample()
fact = dfs["fact_sales"]
dim_customers = dfs["dim_customers"]
dim_products = dfs["dim_products"]
report_customers = dfs["report_customers"]
report_products = dfs["report_products"]

fact.head()


In [None]:
# Schema validation (fail fast if inputs change)
from src.core.schema_checks import assert_required_columns, REQUIRED

assert_required_columns(fact, REQUIRED["fact_sales"], "fact_sales")
assert_required_columns(dim_customers, REQUIRED["dim_customers"], "dim_customers")
assert_required_columns(dim_products, REQUIRED["dim_products"], "dim_products")

In [None]:
# Top products by total sales
top_p = report_products.sort_values("total_sales", ascending=False).head(20)
top_p[["product_name","category","subcategory","product_segment","total_sales","total_customers"]].head(10)


In [None]:
fig_path = project_root() / "outputs/figures/top_products_sales.png"
barh_top(top_p, "product_name", "total_sales", "Top Products by Total Sales", top_n=10, path=fig_path)
fig_path


In [None]:
# Category summary
cat = report_products.groupby(["category","subcategory"], dropna=False)["total_sales"].sum().sort_values(ascending=False)
cat.head(10)


In [None]:
# Export tables
top_path = project_root() / "outputs/tables/top_products.csv"
cat_path = project_root() / "outputs/tables/category_summary.csv"

top_p.to_csv(top_path, index=False)
cat.reset_index().rename(columns={"total_sales":"revenue"}).to_csv(cat_path, index=False)

(top_path, cat_path)
