# FP-Growth vs Apriori - Frequent Pattern Mining
### Complete Google Colab Notebook
This notebook demonstrates how to perform frequent pattern mining using both FP-Growth and Apriori algorithms and compare their performance.

In [None]:
# Install Required Libraries
!pip install mlxtend

In [None]:
# Import Libraries
import time
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth, apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [None]:
# Define Transactional Dataset
dataset = [
    ['milk', 'bread', 'nuts', 'apple'],
    ['milk', 'bread', 'nuts'],
    ['milk', 'bread'],
    ['milk', 'apple'],
    ['milk', 'bread', 'apple'],
    ['milk', 'bread', 'apple'],
    ['bread', 'nuts']
]

In [None]:
# Data Preprocessing
te = TransactionEncoder()
te_data = te.fit(dataset).transform(dataset)
df = pd.DataFrame(te_data, columns=te.columns_)
df

In [None]:
# Run FP-Growth Algorithm
start_fp = time.time()
fp_frequent_itemsets = fpgrowth(df, min_support=0.3, use_colnames=True)
end_fp = time.time()

print("Frequent Itemsets (FP-Growth):")
print(fp_frequent_itemsets)
print(f"\nExecution Time (FP-Growth): {end_fp - start_fp:.6f} seconds")

In [None]:
# Run Apriori Algorithm
start_ap = time.time()
ap_frequent_itemsets = apriori(df, min_support=0.3, use_colnames=True)
end_ap = time.time()

print("Frequent Itemsets (Apriori):")
print(ap_frequent_itemsets)
print(f"\nExecution Time (Apriori): {end_ap - start_ap:.6f} seconds")

In [None]:
# Association Rules (Optional)
rules_fp = association_rules(fp_frequent_itemsets, metric="confidence", min_threshold=0.7)
print("FP-Growth Association Rules:")
print(rules_fp)

rules_ap = association_rules(ap_frequent_itemsets, metric="confidence", min_threshold=0.7)
print("Apriori Association Rules:")
print(rules_ap)

In [None]:
# Performance Comparison
print("---------- Performance Comparison ----------")
print(f"FP-Growth Time: {end_fp - start_fp:.6f} seconds")
print(f"Apriori Time:   {end_ap - start_ap:.6f} seconds")

if (end_fp - start_fp) < (end_ap - start_ap):
    print("\n✅ FP-Growth is faster and more scalable for large datasets.")
else:
    print("\n🔍 Apriori might be better for very small datasets but is slower as data grows.")