In [1]:
# ----------------------------------------
# Environment & Path Setup
# ----------------------------------------

import sys
from pathlib import Path

python_dir = Path().resolve().parent
sys.path.insert(0, str(python_dir))

print("Python path configured:", python_dir)


Python path configured: D:\projects\online-shop-2024-analysis\python


In [2]:
# ----------------------------------------
# Database Connection
# ----------------------------------------

import pandas as pd
from scripts.db_connection import get_mysql_engine

engine = get_mysql_engine()
print("Database engine ready")


Database engine ready


In [4]:
# Validate customer distribution by region
region_check = pd.read_sql("""
SELECT region, COUNT(*) AS customer_count
FROM customer_region
GROUP BY region
""", engine)

region_check


Unnamed: 0,region,customer_count
0,North,4890
1,East,3290
2,South,1540
3,West,280


In [5]:
# Validate region-level revenue in Python
region_sales_check = pd.read_sql("""
SELECT
    cr.region,
    SUM(fs.revenue) AS total_revenue
FROM fact_sales fs
JOIN customer_region cr
    ON fs.customer_id = cr.customer_id
GROUP BY cr.region
""", engine)

region_sales_check


Unnamed: 0,region,total_revenue
0,North,16823939.27
1,East,11259547.98
2,South,4985027.89
3,West,954610.58


In [3]:
# ----------------------------------------
# Load Core Tables
# ----------------------------------------

fact_sales   = pd.read_sql("SELECT * FROM fact_sales", engine)
dim_customer = pd.read_sql("SELECT * FROM dim_customer", engine)
dim_product  = pd.read_sql("SELECT * FROM dim_product", engine)
dim_date     = pd.read_sql("SELECT * FROM dim_date", engine)

fact_sales.head()


Unnamed: 0,order_id,customer_id,product_id,date_id,quantity,revenue
0,1,8002,1896,2024-10-10,4.0,165.18
1,2,5097,534,2024-08-25,10.0,8314.5
2,3,4670,1792,2024-05-21,6.0,1775.76
3,4,3875,1548,2024-06-05,11.0,3916.47
4,5,5507,1270,2024-10-29,11.0,1478.19
