In [3]:
import numpy as np
import pandas as pd

In [7]:
# Load the dataset
data = pd.read_csv('Online retail.csv',header=None)

In [8]:
data.head()

Unnamed: 0,0
0,"shrimp,almonds,avocado,vegetables mix,green gr..."
1,"burgers,meatballs,eggs"
2,chutney
3,"turkey,avocado"
4,"mineral water,milk,energy bar,whole wheat rice..."


In [9]:
data.dropna(inplace=True)

In [11]:
data.columns=['Grocery']

In [12]:
# Split the grocery list into separate items and create a list of transactions
transactions = data['Grocery'].apply(lambda x: x.split(','))

In [13]:
transactions

0       [shrimp, almonds, avocado, vegetables mix, gre...
1                              [burgers, meatballs, eggs]
2                                               [chutney]
3                                       [turkey, avocado]
4       [mineral water, milk, energy bar, whole wheat ...
                              ...                        
7496                    [butter, light mayo, fresh bread]
7497    [burgers, frozen vegetables, eggs, french frie...
7498                                            [chicken]
7499                                [escalope, green tea]
7500    [eggs, frozen smoothie, yogurt cake, low fat y...
Name: Grocery, Length: 7501, dtype: object

In [14]:
# Create a one-hot encoded DataFrame where each column represents a product
from mlxtend.preprocessing import TransactionEncoder

In [15]:
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
basket = pd.DataFrame(te_ary, columns=te.columns_)

In [16]:
# Display the first few rows of the one-hot encoded basket
basket.head()

Unnamed: 0,asparagus,almonds,antioxydant juice,asparagus.1,avocado,babies food,bacon,barbecue sauce,black tea,blueberries,...,turkey,vegetables mix,water spray,white wine,whole weat flour,whole wheat pasta,whole wheat rice,yams,yogurt cake,zucchini
0,False,True,True,False,True,False,False,False,False,False,...,False,True,False,False,True,False,False,True,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,True,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,False


## Association Rule Mining

In [17]:
from mlxtend.frequent_patterns import apriori, association_rules

In [31]:
# Apply the apriori algorithm with a minimum support threshold
frequent_itemsets = apriori(basket, min_support=0.001, use_colnames=True)

In [32]:
# Generate the association rules with a minimum confidence threshold
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.3)

In [33]:
# Display the rules
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(almonds),(eggs),0.020397,0.179709,0.006532,0.320261,1.782108,0.002867,1.206774,0.448005
1,(almonds),(mineral water),0.020397,0.238368,0.007599,0.372549,1.562914,0.002737,1.213851,0.367669
2,(asparagus),(mineral water),0.004666,0.238368,0.002133,0.457143,1.917801,0.001021,1.403006,0.480813
3,(avocado),(mineral water),0.033329,0.238368,0.011598,0.348,1.459926,0.003654,1.168147,0.325896
4,(babies food),(chocolate),0.004533,0.163845,0.001733,0.382353,2.333628,0.00099,1.353775,0.574085


## Analysis and Interpretation

In [34]:
# Filter rules by setting thresholds for support, confidence, and lift
filtered_rules = rules[(rules['support'] >= 0.001) & (rules['confidence'] >= 0.3) & (rules['lift'] >= 0.2)]

In [35]:
# Sort the rules by lift in descending order
filtered_rules = filtered_rules.sort_values(by='lift', ascending=False)

In [36]:
# Display the filtered and sorted rules
filtered_rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
4091,"(french fries, pasta)","(mushroom cream sauce, escalope)",0.003066,0.005733,0.001067,0.347826,60.67543,0.001049,1.524543,0.986544
4062,"(mineral water, pasta)","(eggs, shrimp)",0.002133,0.014131,0.001333,0.625,44.227594,0.001303,2.628983,0.979479
4089,"(french fries, escalope, pasta)",(mushroom cream sauce),0.0016,0.019064,0.001067,0.666667,34.969697,0.001036,2.942808,0.97296
4087,"(mushroom cream sauce, french fries, escalope)",(pasta),0.002,0.015731,0.001067,0.533333,33.902825,0.001035,2.109147,0.972449
1603,"(fresh tuna, honey)",(fromage blanc),0.003999,0.013598,0.0016,0.4,29.415686,0.001545,1.644003,0.969884


In [37]:
# Interpretation of the results
for index, rule in filtered_rules.iterrows():
    print(f"Rule: {rule['antecedents']} -> {rule['consequents']}")
    print(f"Support: {rule['support']}, Confidence: {rule['confidence']}, Lift: {rule['lift']}")
    print("-" * 20)

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


Rule: frozenset({'french fries', 'pasta'}) -> frozenset({'mushroom cream sauce', 'escalope'})
Support: 0.0010665244634048793, Confidence: 0.3478260869565217, Lift: 60.67542972699696
--------------------
Rule: frozenset({'mineral water', 'pasta'}) -> frozenset({'eggs', 'shrimp'})
Support: 0.0013331555792560992, Confidence: 0.625, Lift: 44.22759433962264
--------------------
Rule: frozenset({'french fries', 'escalope', 'pasta'}) -> frozenset({'mushroom cream sauce'})
Support: 0.0010665244634048793, Confidence: 0.6666666666666666, Lift: 34.96969696969697
--------------------
Rule: frozenset({'mushroom cream sauce', 'french fries', 'escalope'}) -> frozenset({'pasta'})
Support: 0.0010665244634048793, Confidence: 0.5333333333333332, Lift: 33.90282485875705
--------------------
Rule: frozenset({'fresh tuna', 'honey'}) -> frozenset({'fromage blanc'})
Support: 0.001599786695107319, Confidence: 0.39999999999999997, Lift: 29.415686274509802
--------------------
Rule: frozenset({'mushroom cream sa