In [2]:
# Install mlxtend
!pip install mlxtend --quiet

import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Load data
df = pd.read_excel("/content/Online Retail.xlsx")

# Clean data
df.dropna(subset=["InvoiceNo", "Description"], inplace=True)
df = df[~df["InvoiceNo"].astype(str).str.startswith("C")]  # remove canceled

# Focus only on UK transactions to reduce size
df = df[df['Country'] == 'United Kingdom']

# Filter to top N most frequent items to save memory
top_items = df['Description'].value_counts().nlargest(50).index
df = df[df['Description'].isin(top_items)]

# Group by Invoice
transactions = df.groupby('InvoiceNo')['Description'].apply(set).tolist()

# Encode with TransactionEncoder
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_data = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_data, columns=te.columns_)

# Run Apriori with a moderate support threshold
frequent_itemsets = apriori(df_encoded, min_support=0.02, use_colnames=True)

# Generate rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.0)
rules = rules.sort_values(by="lift", ascending=False)

# Display top 5
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head()


Unnamed: 0,antecedents,consequents,support,confidence,lift
275,(GREEN REGENCY TEACUP AND SAUCER),"(REGENCY CAKESTAND 3 TIER, ROSES REGENCY TEACU...",0.027944,0.384368,10.876607
270,"(REGENCY CAKESTAND 3 TIER, ROSES REGENCY TEACU...",(GREEN REGENCY TEACUP AND SAUCER),0.027944,0.790749,10.876607
472,"(JUMBO BAG PINK POLKADOT, JUMBO SHOPPER VINTAG...","(JUMBO STORAGE BAG SUKI, JUMBO BAG RED RETROSPOT)",0.02055,0.581498,10.702725
477,"(JUMBO STORAGE BAG SUKI, JUMBO BAG RED RETROSPOT)","(JUMBO BAG PINK POLKADOT, JUMBO SHOPPER VINTAG...",0.02055,0.378223,10.702725
271,"(REGENCY CAKESTAND 3 TIER, GREEN REGENCY TEACU...",(ROSES REGENCY TEACUP AND SAUCER ),0.027944,0.790749,10.615205


In [3]:
def recommend(item, rules_df, top_n=3):
    results = rules_df[rules_df['antecedents'].apply(lambda x: item in x)]
    results = results.sort_values(by='confidence', ascending=False).head(top_n)
    for _, row in results.iterrows():
        print(f"Recommend {list(row['consequents'])} (Confidence: {row['confidence']:.2f}, Lift: {row['lift']:.2f})")

# Example usage
recommend("WHITE HANGING HEART T-LIGHT HOLDER", rules)


Recommend ['JUMBO BAG RED RETROSPOT'] (Confidence: 0.20, Lift: 1.34)
Recommend ['NATURAL SLATE HEART CHALKBOARD '] (Confidence: 0.19, Lift: 1.99)
Recommend ['WOODEN PICTURE FRAME WHITE FINISH'] (Confidence: 0.19, Lift: 2.23)


In [4]:
# Print the top 5 rules in readable format
for i, row in rules.head(5).iterrows():
    print(f"If a user buys {list(row['antecedents'])}, they are likely to buy {list(row['consequents'])} "
          f"with confidence {row['confidence']:.2f} and lift {row['lift']:.2f}")


If a user buys ['GREEN REGENCY TEACUP AND SAUCER'], they are likely to buy ['REGENCY CAKESTAND 3 TIER', 'ROSES REGENCY TEACUP AND SAUCER '] with confidence 0.38 and lift 10.88
If a user buys ['REGENCY CAKESTAND 3 TIER', 'ROSES REGENCY TEACUP AND SAUCER '], they are likely to buy ['GREEN REGENCY TEACUP AND SAUCER'] with confidence 0.79 and lift 10.88
If a user buys ['JUMBO BAG PINK POLKADOT', 'JUMBO SHOPPER VINTAGE RED PAISLEY'], they are likely to buy ['JUMBO STORAGE BAG SUKI', 'JUMBO BAG RED RETROSPOT'] with confidence 0.58 and lift 10.70
If a user buys ['JUMBO STORAGE BAG SUKI', 'JUMBO BAG RED RETROSPOT'], they are likely to buy ['JUMBO BAG PINK POLKADOT', 'JUMBO SHOPPER VINTAGE RED PAISLEY'] with confidence 0.38 and lift 10.70
If a user buys ['REGENCY CAKESTAND 3 TIER', 'GREEN REGENCY TEACUP AND SAUCER'], they are likely to buy ['ROSES REGENCY TEACUP AND SAUCER '] with confidence 0.79 and lift 10.62


THE FOLLOWING CODE WILL WORK WITH NO REDUCTION IN DATA ENTRIES

In [6]:
# Install necessary library
!pip install mlxtend --quiet

# Import required packages
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Load and clean the data
df = pd.read_excel("/content/Online Retail.xlsx")
df.dropna(subset=["InvoiceNo", "Description"], inplace=True)
df = df[~df["InvoiceNo"].astype(str).str.startswith("C")]  # remove canceled invoices

# Filter to one country for simplicity and performance
df = df[df["Country"] == "United Kingdom"]

# Convert invoices to a list of transactions, ensuring all items are strings
basket = df.groupby(['InvoiceNo'])['Description'].apply(lambda items: [str(item) for item in items]).tolist()

# Encode transactions using TransactionEncoder
te = TransactionEncoder()
te_data = te.fit_transform(basket)
df_encoded = pd.DataFrame(te_data, columns=te.columns_)

# Run Apriori algorithm
frequent_items = apriori(df_encoded, min_support=0.02, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_items, metric="lift", min_threshold=1.0)
rules = rules.sort_values(by='lift', ascending=False)

# Show top 5 rules
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head()


Unnamed: 0,antecedents,consequents,support,confidence,lift
168,(PINK REGENCY TEACUP AND SAUCER),"(ROSES REGENCY TEACUP AND SAUCER , GREEN REGEN...",0.026409,0.70128,18.675463
165,"(ROSES REGENCY TEACUP AND SAUCER , GREEN REGEN...",(PINK REGENCY TEACUP AND SAUCER),0.026409,0.703281,18.675463
167,(GREEN REGENCY TEACUP AND SAUCER),"(PINK REGENCY TEACUP AND SAUCER, ROSES REGENCY...",0.026409,0.527837,18.047007
166,"(PINK REGENCY TEACUP AND SAUCER, ROSES REGENCY...",(GREEN REGENCY TEACUP AND SAUCER),0.026409,0.90293,18.047007
164,"(PINK REGENCY TEACUP AND SAUCER, GREEN REGENCY...",(ROSES REGENCY TEACUP AND SAUCER ),0.026409,0.854419,16.666982
