# Association Rule Mining for Product Recommendations

This notebook implements the Association Rule Mining process to find patterns between product categories based on user ratings. The results will be used to generate product suggestions for the e-commerce recommendation system.

### Objectives:
1. Load user ratings and product data.
2. Map products to their corresponding "Main - Sub" categories.
3. Group interactions by user to create "transactions".
4. Apply Association Rule Mining to find significant patterns.
5. Export the rules in the standard format required by the system.

In [None]:
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import os

# File Paths (Relative to project root)
RATINGS_PATH = 'proceed/new_rating.csv'
PRODUCTS_PATH = 'proceed/Amazon-Products_processed_03.csv'
OUTPUT_PATH = 'association_rules/association_rules_1.csv'

### 1. Load and Prepare Data

In [None]:
# Load datasets
ratings_df = pd.read_csv(RATINGS_PATH)
products_df = pd.read_csv(PRODUCTS_PATH)

# Create Category Key: "Main - Sub"
products_df['category_key'] = products_df['main_category'] + " - " + products_df['sub_category']

# Merge ratings with category information
merged_df = pd.merge(
    ratings_df, 
    products_df[['id', 'category_key']], 
    left_on='productid', 
    right_on='id', 
    how='inner'
)

# Drop records with missing categories
merged_df.dropna(subset=['category_key'], inplace=True)

print(f"Total interactions: {len(merged_df)}")

### 2. Group by User (Transactions)

In [None]:
# Group products by user_id to create transactions (deduplicated categories per user)
transactions = merged_df.groupby('user_id')['category_key'].apply(lambda x: list(set(x))).tolist()

print(f"Number of transactions: {len(transactions)}")
lengths = [len(t) for t in transactions]
print(f"Average unique categories per user: {np.mean(lengths):.2f}")

### 3. One-Hot Encoding

In [None]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_ohe = pd.DataFrame(te_ary, columns=te.columns_)

print(f"One-hot encoded dataframe shape: {df_ohe.shape}")

### 4. Mining with Apriori (Max Length 2)
Limiting to pairs to avoid exponential explosion in dense data.

In [None]:
# Given the high density (avg 46 categories per user out of 112),
# we use a reasonable support and limit itemset size to 2.
support = 0.4
print(f"Using min_support: {support}")

# Find frequent itemsets (pairs only for performance and clarity)
frequent_itemsets = apriori(df_ohe, min_support=support, use_colnames=True, max_len=2)
print(f"Number of frequent itemsets found: {len(frequent_itemsets)}")

# Generate rules
if len(frequent_itemsets) > 0:
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
    print(f"Number of rules generated: {len(rules)}")
else:
    rules = pd.DataFrame()
    print("No frequent itemsets found.")

### 5. Format and Export

In [None]:
if not rules.empty:
    # Helper function to clean category sets
    def format_set(s):
        return ", ".join(list(s))

    # Select and rename columns to match sample
    output_df = rules[[
        'antecedents', 'consequents', 'antecedent support', 
        'consequent support', 'support', 'confidence', 
        'lift', 'leverage', 'conviction'
]].copy()

    # Convert frozensets to strings
    output_df['antecedents'] = output_df['antecedents'].apply(format_set)
    output_df['consequents'] = output_df['consequents'].apply(format_set)

    # Export to CSV
    if not os.path.exists('association_rules'):
        os.makedirs('association_rules')

    output_df.to_csv(OUTPUT_PATH, index=False)

    print(f"Rules exported successfully to {OUTPUT_PATH}")
    print(output_df.head())
else:
    print("No rules to export.")