In [7]:
import polars as pl
import warnings
warnings.filterwarnings("ignore")

customers = pl.read_csv('data/raw/customers.csv')
products = pl.read_csv('data/raw/products.csv')
transactions = pl.read_csv('data/raw/transactions.csv')

print("Dataset Overview:")
print(f"Customers: {len(customers):,}")
print(f"Products: {len(products):,}")
print(f"Transactions: {len(transactions):,}")
print(f"\nTotal Revenue: ${transactions['total_amount'].sum():,.2f}")

customers.head()

Dataset Overview:
Customers: 5,000
Products: 500
Transactions: 100,000

Total Revenue: $78,301,633.64


customer_id,name,email,city,state,country,join_date
str,str,str,str,str,str,str
"""C00001""","""Robert Johnston""","""gilespaul@example.org""","""Lake Ricky""","""Oregon""","""India""","""2025-02-17"""
"""C00002""","""Mr. Benjamin Li""","""kellerjake@example.org""","""Lake Amy""","""Massachusetts""","""India""","""2024-04-27"""
"""C00003""","""Sarah Guzman""","""christinawalls@example.net""","""Lake Paul""","""Rhode Island""","""India""","""2025-09-18"""
"""C00004""","""Andrew Young""","""alambert@example.net""","""East Marvinfort""","""Illinois""","""India""","""2025-09-28"""
"""C00005""","""Johnny Mueller""","""samuel66@example.org""","""Justinland""","""Louisiana""","""India""","""2023-06-30"""


In [8]:
print(f"Products: {len(products):,}")

# Product analysis
print("\n Products by Category:")
print(products.group_by('category').agg(pl.count()).sort('count', descending=True))

print("\n Price Range:")
print(f"Min: ${products['price'].min()}")
print(f"Max: ${products['price'].max()}")
print(f"Average: ${products['price'].mean():.2f}")

Products: 500

 Products by Category:
shape: (7, 2)
┌────────────────┬───────┐
│ category       ┆ count │
│ ---            ┆ ---   │
│ str            ┆ u32   │
╞════════════════╪═══════╡
│ Books          ┆ 82    │
│ Home & Kitchen ┆ 75    │
│ Sports         ┆ 73    │
│ Toys           ┆ 72    │
│ Electronics    ┆ 71    │
│ Beauty         ┆ 70    │
│ Clothing       ┆ 57    │
└────────────────┴───────┘

 Price Range:
Min: $10.83
Max: $499.79
Average: $261.21


In [11]:
print(f"Transcations: {len(transactions):,}")

print("Transaction Analysis:")
print(f"\nTotal Revenue: ${transactions['total_amount'].sum():,.2f}")
print(f"Average Order Value: ${transactions['total_amount'].mean():.2f}")
print(f"\nPayment Methods:")
print(transactions.group_by('payment_method').agg(pl.count()).sort('count', descending=True))

Transcations: 100,000
Transaction Analysis:

Total Revenue: $78,301,633.64
Average Order Value: $783.02

Payment Methods:
shape: (4, 2)
┌────────────────┬───────┐
│ payment_method ┆ count │
│ ---            ┆ ---   │
│ str            ┆ u32   │
╞════════════════╪═══════╡
│ Debit Card     ┆ 25131 │
│ Credit Card    ┆ 25097 │
│ Cash           ┆ 24989 │
│ UPI            ┆ 24783 │
└────────────────┴───────┘
