In [48]:
# Load necessary packages
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [49]:
# Load the dataset without headers
df = pd.read_excel('Online retail.xlsx', sheet_name='Sheet1', header=None)

## Data Preprocessing

In [50]:
# Display the first few rows to understand the structure
print(df.head())
print(df.shape)

                                                   0
0  shrimp,almonds,avocado,vegetables mix,green gr...
1                             burgers,meatballs,eggs
2                                            chutney
3                                     turkey,avocado
4  mineral water,milk,energy bar,whole wheat rice...
(7501, 1)


In [51]:
# Convert the single column of comma-separated items into a list of transactions
transactions = df[0].apply(lambda x: x.split(','))

In [52]:
# Display the first few transactions to verify
print(transactions.head())

0    [shrimp, almonds, avocado, vegetables mix, gre...
1                           [burgers, meatballs, eggs]
2                                            [chutney]
3                                    [turkey, avocado]
4    [mineral water, milk, energy bar, whole wheat ...
Name: 0, dtype: object


In [53]:
print(transactions.shape)

(7501,)


## Implementing the Apriori Algorithm

In [54]:
# Convert the transactions list into a one-hot encoded DataFrame
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_trans = pd.DataFrame(te_ary, columns=te.columns_)

In [55]:
# Apply the Apriori algorithm with a minimum support threshold
frequent_itemsets = apriori(df_trans, min_support=0.005, use_colnames=True)

In [56]:
# Check if we found any frequent itemsets
print(frequent_itemsets.head())

if frequent_itemsets.empty:
    print("No frequent itemsets found. Try lowering the min_support value.")
else:
    # Generate association rules with a minimum confidence threshold
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.2)

    # Display the resulting rules
    print(rules.head())

    support             itemsets
0  0.020397            (almonds)
1  0.008932  (antioxydant juice)
2  0.033329            (avocado)
3  0.008666              (bacon)
4  0.010799     (barbecue sauce)
  antecedents  consequents  antecedent support  consequent support   support  \
0   (almonds)    (burgers)            0.020397            0.087188  0.005199   
1   (almonds)  (chocolate)            0.020397            0.163845  0.005999   
2   (almonds)       (eggs)            0.020397            0.179709  0.006532   
3   (almonds)  (green tea)            0.020397            0.132116  0.005066   
4   (almonds)       (milk)            0.020397            0.129583  0.005199   

   confidence      lift  leverage  conviction  zhangs_metric  
0    0.254902  2.923577  0.003421    1.225089       0.671653  
1    0.294118  1.795099  0.002657    1.184553       0.452150  
2    0.320261  1.782108  0.002867    1.206774       0.448005  
3    0.248366  1.879913  0.002371    1.154663       0.477806  
4    0

## Analysis and Interpretation
- Analyze the generated rules
- Identify interesting patterns and relationships
- Provide insights into customer purchasing behavior

In [57]:
if not rules.empty:
    # Sort the rules by lift in descending order
    rules.sort_values('lift', ascending=False, inplace=True)

    # Display the top 10 rules
    print(rules.head(10))

    # Analyze the rules
    for _, rule in rules.head(10).iterrows():
        antecedents = ', '.join(list(rule['antecedents']))
        consequents = ', '.join(list(rule['consequents']))
        print(f"Rule: If a customer buys {antecedents}, they are likely to also buy {consequents}")
        print(f" - Support: {rule['support']}")
        print(f" - Confidence: {rule['confidence']}")
        print(f" - Lift: {rule['lift']}")
        print()
else:
    print("No association rules found.")

                        antecedents          consequents  antecedent support  \
108                         (pasta)           (escalope)            0.015731   
208                         (pasta)             (shrimp)            0.015731   
205             (whole wheat pasta)          (olive oil)            0.029463   
521      (herb & pepper, spaghetti)        (ground beef)            0.016264   
519  (mineral water, herb & pepper)        (ground beef)            0.017064   
152                  (tomato sauce)        (ground beef)            0.014131   
107          (mushroom cream sauce)           (escalope)            0.019064   
504  (frozen vegetables, spaghetti)           (tomatoes)            0.027863   
577           (mineral water, soup)          (olive oil)            0.023064   
503           (tomatoes, spaghetti)  (frozen vegetables)            0.020931   

     consequent support   support  confidence      lift  leverage  conviction  \
108            0.079323  0.005866    0