In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

transactions = [
    ['a', 'b', 'c'],
    ['b', 'd'],
    ['b', 'a', 'd', 'c'],
    ['e', 'd'],
    ['a', 'b', 'c', 'd'],
    ['f'],
]

from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

min_support = 0.5
min_confidence = 0.7


In [2]:
frequent_itemsets = apriori(df, min_support, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)

print("Frequent Itemsets:")
print(frequent_itemsets)

print("\nAssociation Rules:")
print(rules[['antecedents', 'consequents', 'confidence']])

Frequent Itemsets:
    support   itemsets
0  0.500000        (a)
1  0.666667        (b)
2  0.500000        (c)
3  0.666667        (d)
4  0.500000     (a, b)
5  0.500000     (c, a)
6  0.500000     (c, b)
7  0.500000     (d, b)
8  0.500000  (c, a, b)

Association Rules:
   antecedents consequents  confidence
0          (a)         (b)        1.00
1          (b)         (a)        0.75
2          (c)         (a)        1.00
3          (a)         (c)        1.00
4          (c)         (b)        1.00
5          (b)         (c)        0.75
6          (d)         (b)        0.75
7          (b)         (d)        0.75
8       (c, a)         (b)        1.00
9       (c, b)         (a)        1.00
10      (a, b)         (c)        1.00
11         (c)      (a, b)        1.00
12         (a)      (c, b)        1.00
13         (b)      (c, a)        0.75


In [3]:
from apriori import apriori
from apriori import generate_rules

frequent_itemsets, support_data = apriori(transactions, min_support)
rules = generate_rules(frequent_itemsets, support_data, min_confidence)

print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(set(itemset), "=>", f"support: {support_data[itemset]:.2f}")

print("\n\nAssociation Rules:")
for antecedent, consequent, support, confidence in rules:
    print(f"{set(antecedent)} => {set(consequent)} (support: {support:.2f}, confidence: {confidence:.2f})")

Frequent Itemsets:
{'c'} => support: 0.50
{'a'} => support: 0.50
{'b'} => support: 0.67
{'d'} => support: 0.67
{'c', 'a'} => support: 0.50
{'c', 'b'} => support: 0.50
{'d', 'b'} => support: 0.50
{'a', 'b'} => support: 0.50
{'c', 'a', 'b'} => support: 0.50


Association Rules:
{'c'} => {'a'} (support: 0.50, confidence: 1.00)
{'a'} => {'c'} (support: 0.50, confidence: 1.00)
{'c'} => {'b'} (support: 0.50, confidence: 1.00)
{'b'} => {'c'} (support: 0.50, confidence: 0.75)
{'d'} => {'b'} (support: 0.50, confidence: 0.75)
{'b'} => {'d'} (support: 0.50, confidence: 0.75)
{'a'} => {'b'} (support: 0.50, confidence: 1.00)
{'b'} => {'a'} (support: 0.50, confidence: 0.75)
{'c'} => {'a', 'b'} (support: 0.50, confidence: 1.00)
{'a'} => {'c', 'b'} (support: 0.50, confidence: 1.00)
{'b'} => {'c', 'a'} (support: 0.50, confidence: 0.75)
{'c', 'a'} => {'b'} (support: 0.50, confidence: 1.00)
{'c', 'b'} => {'a'} (support: 0.50, confidence: 1.00)
{'a', 'b'} => {'c'} (support: 0.50, confidence: 1.00)
Frequen

In [4]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori as mlxtend_apriori
from mlxtend.frequent_patterns import association_rules as mlxtend_rules

def assert_apriori_equal(transactions, min_support=0.5, min_confidence=0.7):
    te = TransactionEncoder()
    te_ary = te.fit(transactions).transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)

    mlxtend_frequent = mlxtend_apriori(df, min_support=min_support, use_colnames=True)
    mlxtend_rule_df = mlxtend_rules(mlxtend_frequent, metric="confidence", min_threshold=min_confidence)

    mlxtend_itemsets = {frozenset(x): round(s, 5) for x, s in zip(mlxtend_frequent['itemsets'], mlxtend_frequent['support'])}
    mlxtend_rules_set = set()
    for _, row in mlxtend_rule_df.iterrows():
        mlxtend_rules_set.add((
            frozenset(row['antecedents']),
            frozenset(row['consequents']),
            round(row['support'], 5),
            round(row['confidence'], 5)
        ))

    custom_itemsets, custom_support_data = apriori(transactions, min_support)
    custom_itemsets_dict = {itemset: round(support, 5) for itemset, support in custom_support_data.items()}

    custom_rules = generate_rules(custom_itemsets, custom_support_data, min_confidence)
    custom_rules_set = set(
        (frozenset(ant), frozenset(con), round(sup, 5), round(conf, 5))
        for ant, con, sup, conf in custom_rules
    )

    assert mlxtend_itemsets == custom_itemsets_dict, "Frequent itemsets do not match!"
    assert mlxtend_rules_set == custom_rules_set, "Association rules do not match!"

    print("✅ Custom and mlxtend Apriori implementations produce the same results.")


In [5]:
assert_apriori_equal(transactions, min_support=0.6, min_confidence=0.7)

✅ Custom and mlxtend Apriori implementations produce the same results.


In [24]:
import pandas as pd

df_tmp = pd.read_csv("./groceries.csv", header=None, on_bad_lines='skip')
# print(df1)

tx_list = df_tmp.apply(
    lambda row: [item for item in row if pd.notnull(item) and item != ''],
    axis=1
)
print(tx_list)

te = TransactionEncoder()
te_ary = te.fit(tx_list).transform(tx_list)
df = pd.DataFrame(te_ary, columns=te.columns_)
print(te.columns_)
df.shape

0       [citrus fruit, semi-finished bread, margarine,...
1                        [tropical fruit, yogurt, coffee]
2                                            [whole milk]
3        [pip fruit, yogurt, cream cheese , meat spreads]
4       [other vegetables, whole milk, condensed milk,...
                              ...                        
6101                   [yogurt, long life bakery product]
6102                    [pork, frozen vegetables, pastry]
6103    [ice cream, long life bakery product, specialt...
6104                                  [cooking chocolate]
6105    [semi-finished bread, bottled water, soda, bot...
Length: 6106, dtype: object
['Instant food products', 'UHT-milk', 'abrasive cleaner', 'artif. sweetener', 'baby cosmetics', 'bags', 'baking powder', 'bathroom cleaner', 'beef', 'berries', 'beverages', 'bottled beer', 'bottled water', 'brandy', 'brown bread', 'butter', 'butter milk', 'cake bar', 'candles', 'candy', 'canned beer', 'canned fish', 'canned fruit', 

(6106, 165)

In [32]:
frequent_itemsets = mlxtend_apriori(df, min_support=0.001, use_colnames=True)
rules = mlxtend_rules(frequent_itemsets, metric="confidence", min_threshold=0.3)

print("Frequent Itemsets:")
print(frequent_itemsets)

print("\nAssociation Rules:")
print(rules[['antecedents', 'consequents', 'confidence']])

Frequent Itemsets:
      support                                         itemsets
0    0.003112                          (Instant food products)
1    0.015558                                       (UHT-milk)
2    0.001310                               (artif. sweetener)
3    0.004913                                  (baking powder)
4    0.001146                               (bathroom cleaner)
..        ...                                              ...
511  0.001146     (rolls/buns, fruit/vegetable juice, sausage)
512  0.001146       (rolls/buns, other vegetables, whole milk)
513  0.001310  (whole milk, other vegetables, root vegetables)
514  0.001146             (whole milk, other vegetables, soda)
515  0.003439                      (rolls/buns, sausage, soda)

[516 rows x 2 columns]

Association Rules:
                            antecedents         consequents  confidence
0                              (liquor)      (bottled beer)    0.562500
1                         (canned fis

In [30]:
from apriori import apriori
from apriori import generate_rules

frequent_itemsets, support_data = apriori(tx_list, min_support=0.001)
rules = generate_rules(frequent_itemsets, support_data, min_confidence=0.3)

print("Frequent Itemsets:")
for itemset in frequent_itemsets:
    print(set(itemset), "=>", f"support: {support_data[itemset]:.5f}")

print("\n\nAssociation Rules:")
for antecedent, consequent, support, confidence in rules:
    print(f"{set(antecedent)} => {set(consequent)} (support: {support:.5f}, confidence: {confidence:.5f})")

Frequent Itemsets:
{'popcorn'} => support: 0.00213
{'light bulbs'} => support: 0.00229
{'soups'} => support: 0.00147
{'domestic eggs'} => support: 0.02096
{'sliced cheese'} => support: 0.00573
{'rice'} => support: 0.00115
{'frozen meals'} => support: 0.01572
{'margarine'} => support: 0.02195
{'chicken'} => support: 0.01785
{'curd'} => support: 0.02031
{'yogurt'} => support: 0.06289
{'specialty bar'} => support: 0.02145
{'liquor (appetizer)'} => support: 0.00540
{'pickled vegetables'} => support: 0.00360
{'zwieback'} => support: 0.00409
{'misc. beverages'} => support: 0.01916
{'waffles'} => support: 0.01818
{'prosecco'} => support: 0.00164
{'male cosmetics'} => support: 0.00295
{'whipped/sour cream'} => support: 0.02653
{'specialty chocolate'} => support: 0.02211
{'sausage'} => support: 0.04815
{'cream cheese '} => support: 0.01458
{'tropical fruit'} => support: 0.03980
{'pet care'} => support: 0.00704
{'potato products'} => support: 0.00115
{'flower soil/fertilizer'} => support: 0.0021

In [31]:
assert_apriori_equal(tx_list, min_support=0.001, min_confidence=0.3)

✅ Custom and mlxtend Apriori implementations produce the same results.
