In [6]:
# ----------------------------------------
# Environment & Path Setup
# ----------------------------------------

import sys
from pathlib import Path

# Add /python directory to Python path
python_dir = Path().resolve().parent
sys.path.insert(0, str(python_dir))

print("Python path set to:", python_dir)


Python path set to: D:\projects\online-shop-2024-analysis\python


In [9]:
# ----------------------------------------
# Database Connection Setup
# ----------------------------------------

import pandas as pd
from scripts.db_connection import get_mysql_engine

# Create MySQL engine
engine = get_mysql_engine()

print("Database engine created successfully")


Database engine created successfully


In [10]:
# Core data handling
import pandas as pd

# Market Basket Analysis libraries
from mlxtend.frequent_patterns import apriori, association_rules


In [11]:
# SQL query to fetch order-wise product data
# Each row represents one product purchased in an order

query = """
SELECT 
    fs.order_id,
    dp.product_name
FROM fact_sales fs
JOIN dim_product dp
    ON fs.product_id = dp.product_id
WHERE fs.quantity > 0
"""

# Load data into pandas DataFrame
basket_df = pd.read_sql(query, engine)

# Preview data
basket_df.head()


Unnamed: 0,order_id,product_name
0,1,4K Monitor
1,2,Air Purifier
2,3,Electric Kettle
3,4,File Cabinet
4,5,Wireless Mouse


In [12]:
# Group data by order and product
# This creates a matrix where:
# Rows = Orders
# Columns = Products
# Values = Count of product in that order

basket = (
    basket_df
    .groupby(['order_id', 'product_name'])
    .size()
    .unstack(fill_value=0)
)

# Convert counts to binary:
# 1 = product purchased
# 0 = product not purchased
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

basket.head()


  basket = basket.applymap(lambda x: 1 if x > 0 else 0)


product_name,4K Monitor,Air Purifier,Bath Towels,Bluetooth Headphones,Bookshelf,Cable Organizer,Coffee Maker,Computer Speakers,Desk Lamp,Desk Mat,...,Toaster Oven,USB-C Hub,Wall Clock,Water Bottle,Webcam HD,WiFi Router,Wireless Charger,Wireless Earbuds,Wireless Mouse,Wireless Presenter
order_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [13]:
# Apply Apriori algorithm
# min_support = 0.02 means:
# Product combination must appear in at least 2% of all orders

frequent_itemsets = apriori(
    basket,
    min_support=0.02,
    use_colnames=True
)

# View most frequent itemsets
frequent_itemsets.sort_values(by="support", ascending=False).head()




Unnamed: 0,support,itemsets
0,0.024583,(4K Monitor)
18,0.0225,(Storage Shelf)
2,0.022417,(Bluetooth Headphones)
9,0.022333,(Kitchen Blender)
20,0.022333,(Throw Pillows)


In [14]:
# Generate association rules using Lift
# Lift > 1 means products are positively associated

rules = association_rules(
    frequent_itemsets,
    metric="lift",
    min_threshold=1.2
)

# Sort rules by confidence (strength of rule)
rules = rules.sort_values(by="confidence", ascending=False)

rules.head()


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski


In [15]:
# Keep only strong and actionable rules
# confidence >= 60% → strong prediction
# lift >= 1.5 → meaningful association

strong_rules = rules[
    (rules['confidence'] >= 0.6) &
    (rules['lift'] >= 1.5)
]

strong_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]


Unnamed: 0,antecedents,consequents,support,confidence,lift
