# 05 â€” Pareto & Contribution Tiers (A/B/C)

This notebook builds Pareto-style contribution curves and assigns A/B/C tiers for customers and products based on cumulative revenue contribution. This is a common commercial segmentation technique.

In [1]:
"""Repository bootstrap for notebooks and ad-hoc execution.

This module ensures the repository root (the directory containing 'src/') is added
to sys.path so that imports like `from src...` work reliably when running
notebooks from the /notebooks folder (or any subfolder inside the repo).
"""
import bootstrap


In [2]:
import sys
from pathlib import Path


In [3]:
import pandas as pd
import numpy as np

from src.io import load_sample, project_root
from src import quality, kpi_metrics as metrics
from src.viz import line_chart, barh_top

dfs = load_sample()
fact = dfs["fact_sales"]
dim_customers = dfs["dim_customers"]
dim_products = dfs["dim_products"]
report_customers = dfs["report_customers"]
report_products = dfs["report_products"]

fact.head()


Unnamed: 0,order_number,product_key,customer_key,order_date,shipping_date,due_date,sales_amount,quantity,price
0,SO54496,282,5400,2013-03-16,2013-03-23,2013-03-28,25,1,25
1,SO54496,289,5400,2013-03-16,2013-03-23,2013-03-28,5,1,5
2,SO54496,259,5400,2013-03-16,2013-03-23,2013-03-28,2,1,2
3,SO54497,174,9281,2013-03-16,2013-03-23,2013-03-28,22,1,22
4,SO54497,280,9281,2013-03-16,2013-03-23,2013-03-28,9,1,9


In [4]:
# Schema validation (fail fast if inputs change)
from src.core.schema_checks import assert_required_columns, REQUIRED

assert_required_columns(fact, REQUIRED["fact_sales"], "fact_sales")
assert_required_columns(dim_customers, REQUIRED["dim_customers"], "dim_customers")
assert_required_columns(dim_products, REQUIRED["dim_products"], "dim_products")

In [5]:
def pareto_table(df, name_col, value_col):
    t = df[[name_col, value_col]].dropna().sort_values(value_col, ascending=False).copy()
    t["cum_revenue"] = t[value_col].cumsum()
    t["cum_share"] = t["cum_revenue"] / t[value_col].sum()
    return t

cust_pareto = pareto_table(report_customers, "customer_name", "total_sales")
prod_pareto = pareto_table(report_products, "product_name", "total_sales")

cust_pareto.head()


Unnamed: 0,customer_name,total_sales,cum_revenue,cum_share
1088,Kaitlyn Henderson,13294,13294,0.000493
1257,Nichole Nara,13294,26588,0.000986
1264,Margaret He,13268,39856,0.001478
1087,Randall Dominguez,13265,53121,0.00197
1256,Adriana Gonzalez,13242,66363,0.002461


In [6]:
# Assign tiers based on cumulative share
def assign_tier(cum_share):
    if cum_share <= 0.8:
        return "A"
    elif cum_share <= 0.95:
        return "B"
    else:
        return "C"

cust_pareto["tier"] = cust_pareto["cum_share"].apply(assign_tier)
prod_pareto["tier"] = prod_pareto["cum_share"].apply(assign_tier)

cust_pareto["tier"].value_counts()


tier
C    10572
A     4613
B     2427
Name: count, dtype: int64

In [7]:
# Export tier tables
cust_path = project_root() / "outputs/tables/customer_tiers.csv"
prod_path = project_root() / "outputs/tables/product_tiers.csv"

cust_pareto[[ "customer_name", "total_sales", "cum_share", "tier" ]].to_csv(cust_path, index=False)
prod_pareto[[ "product_name", "total_sales", "cum_share", "tier" ]].to_csv(prod_path, index=False)

(cust_path, prod_path)


(WindowsPath('C:/Users/uvcha/Python_Data_Analysis_Project_Fortis_Aligned_bootstrap/Python_Data_Analysis_Project_Fortis_Aligned/outputs/tables/customer_tiers.csv'),
 WindowsPath('C:/Users/uvcha/Python_Data_Analysis_Project_Fortis_Aligned_bootstrap/Python_Data_Analysis_Project_Fortis_Aligned/outputs/tables/product_tiers.csv'))